From a7511fa5dacbbd552fe19a47b0a1f9330b7d3616 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 22 Sep 2023 09:15:39 +1000 Subject: [PATCH] Updated vkd3d-latest to 1.9 --- ...90d4529f27b477bf54e3a8657db2fa78c3a.patch} | 18739 ++++++++++++--- ...-46c7f65be8337a108a04a616ccd0c8a7732.patch | 19526 ---------------- 2 files changed, 15875 insertions(+), 22390 deletions(-) rename patches/vkd3d-latest/{0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch => 0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch} (64%) delete mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-46c7f65be8337a108a04a616ccd0c8a7732.patch diff --git a/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch similarity index 64% rename from patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch rename to patches/vkd3d-latest/0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch index b819f62e..c6aa9508 100644 --- a/patches/vkd3d-latest/0001-Update-vkd3d-to-771e442af16228a977eebba82224f06f6d02.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch @@ -1,76 +1,83 @@ -From c02f983dde361302e69fb6da9526801b59e41b07 Mon Sep 17 00:00:00 2001 +From eaf7c2d83d21cbd5ecfa40ebe1eb9a92c1c323ad Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 17 May 2023 08:35:40 +1000 -Subject: [PATCH] Update vkd3d to 771e442af16228a977eebba82224f06f6d0202fe - (1.8) +Subject: [PATCH] Updated vkd3d to 90d4529f27b477bf54e3a8657db2fa78c3af5eec + (1.9) --- - libs/vkd3d/Makefile.in | 6 +- + libs/vkd3d/Makefile.in | 7 +- libs/vkd3d/include/list.h | 270 + libs/vkd3d/include/private/list.h | 270 + libs/vkd3d/include/private/rbtree.h | 378 ++ - libs/vkd3d/include/private/vkd3d_common.h | 3 +- + libs/vkd3d/include/private/vkd3d_common.h | 25 +- libs/vkd3d/include/private/vkd3d_debug.h | 2 +- + .../include/private/vkd3d_shader_utils.h | 67 + libs/vkd3d/include/private/vkd3d_test.h | 432 ++ - libs/vkd3d/include/vkd3d.h | 1 + + libs/vkd3d/include/vkd3d.h | 37 + libs/vkd3d/include/vkd3d_d3d9types.h | 237 + libs/vkd3d/include/vkd3d_d3dcompiler.h | 74 + - libs/vkd3d/include/vkd3d_shader.h | 1 + + libs/vkd3d/include/vkd3d_shader.h | 307 +- libs/vkd3d/include/vkd3d_utils.h | 108 + - libs/vkd3d/include/vkd3d_windows.h | 284 + + libs/vkd3d/include/vkd3d_windows.h | 289 + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-common/debug.c | 4 +- - .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 25 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1131 +++- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1773 +----- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 619 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 235 +- + libs/vkd3d/libs/vkd3d-common/debug.c | 21 +- + .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 75 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1521 ++++- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1813 +----- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 2968 +++++++++ + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 896 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 295 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 8 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2362 +++++--- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1821 ++++-- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 358 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 3815 ++++++----- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2391 +++++-- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 799 ++- libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c | 980 --- libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c | 2531 -------- - libs/vkd3d/libs/vkd3d-shader/ir.c | 1072 ++++ + libs/vkd3d/libs/vkd3d-shader/ir.c | 1294 ++++ libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 6 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 1325 ++--- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 5234 +++++++++++++++++ - .../libs/vkd3d-shader/vkd3d_shader_main.c | 241 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 91 +- - libs/vkd3d/libs/vkd3d/command.c | 160 +- - libs/vkd3d/libs/vkd3d/device.c | 205 +- - libs/vkd3d/libs/vkd3d/resource.c | 935 ++- - libs/vkd3d/libs/vkd3d/state.c | 4 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 238 +- - 38 files changed, 14623 insertions(+), 8804 deletions(-) + libs/vkd3d/libs/vkd3d-shader/preproc.l | 146 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 1720 ++--- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 5619 +++++++++++++++++ + .../libs/vkd3d-shader/vkd3d_shader_main.c | 601 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 207 +- + libs/vkd3d/libs/vkd3d/command.c | 810 ++- + libs/vkd3d/libs/vkd3d/device.c | 440 +- + libs/vkd3d/libs/vkd3d/resource.c | 1235 ++-- + libs/vkd3d/libs/vkd3d/state.c | 18 +- + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 356 +- + 41 files changed, 22867 insertions(+), 10202 deletions(-) create mode 100644 libs/vkd3d/include/list.h create mode 100644 libs/vkd3d/include/private/list.h create mode 100644 libs/vkd3d/include/private/rbtree.h + create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h create mode 100644 libs/vkd3d/include/private/vkd3d_test.h create mode 100644 libs/vkd3d/include/vkd3d_d3d9types.h create mode 100644 libs/vkd3d/include/vkd3d_d3dcompiler.h create mode 100644 libs/vkd3d/include/vkd3d_utils.h create mode 100644 libs/vkd3d/include/vkd3d_windows.h - rename libs/vkd3d/libs/vkd3d-shader/{trace.c => d3d_asm.c} (98%) + rename libs/vkd3d/libs/vkd3d-shader/{trace.c => d3d_asm.c} (97%) + create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c create mode 100644 libs/vkd3d/libs/vkd3d-shader/ir.c create mode 100644 libs/vkd3d/libs/vkd3d-shader/tpf.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in -index 0ed4e27ad83..1ba0e9f71e1 100644 +index 0ed4e27ad83..f647af11d07 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in -@@ -14,6 +14,7 @@ SOURCES = \ +@@ -14,20 +14,21 @@ SOURCES = \ libs/vkd3d-common/memory.c \ libs/vkd3d-common/utf8.c \ libs/vkd3d-shader/checksum.c \ + libs/vkd3d-shader/d3d_asm.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ ++ libs/vkd3d-shader/dxil.c \ libs/vkd3d-shader/glsl.c \ -@@ -22,12 +23,11 @@ SOURCES = \ + libs/vkd3d-shader/hlsl.c \ + libs/vkd3d-shader/hlsl.l \ libs/vkd3d-shader/hlsl.y \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ @@ -1022,10 +1029,26 @@ index 00000000000..b5d38bca54c + +#endif /* __WINE_WINE_RBTREE_H */ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 3cf0422596c..1ac23b4a085 100644 +index 3cf0422596c..f7d98f327f1 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -84,7 +84,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) +@@ -20,6 +20,7 @@ + #define __VKD3D_COMMON_H + + #include "config.h" ++#define WIN32_LEAN_AND_MEAN + #include "windows.h" + #include "vkd3d_types.h" + +@@ -28,6 +29,7 @@ + #include + #include + #include ++#include + + #ifdef _MSC_VER + #include +@@ -84,7 +86,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); @@ -1034,7 +1057,41 @@ index 3cf0422596c..1ac23b4a085 100644 return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; -@@ -249,6 +249,7 @@ static inline LONG InterlockedDecrement(LONG volatile *x) +@@ -171,6 +173,11 @@ static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) + #endif + } + ++static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_t size) ++{ ++ return (~(size_t)0 - start) / size < count; ++} ++ + static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) + { + return low | ((uint16_t)high << 8); +@@ -186,6 +193,21 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) + return (x > y) - (x < y); + } + ++static inline bool bitmap_clear(uint32_t *map, unsigned int idx) ++{ ++ return map[idx >> 5] &= ~(1u << (idx & 0x1f)); ++} ++ ++static inline bool bitmap_set(uint32_t *map, unsigned int idx) ++{ ++ return map[idx >> 5] |= (1u << (idx & 0x1f)); ++} ++ ++static inline bool bitmap_is_set(const uint32_t *map, unsigned int idx) ++{ ++ return map[idx >> 5] & (1u << (idx & 0x1f)); ++} ++ + static inline int ascii_isupper(int c) + { + return 'A' <= c && c <= 'Z'; +@@ -249,6 +271,7 @@ static inline LONG InterlockedDecrement(LONG volatile *x) # else # error "InterlockedDecrement() not implemented for this platform" # endif @@ -1055,6 +1112,79 @@ index 4f6d43af12f..6708cad344f 100644 static inline const char *debugstr_guid(const GUID *guid) { +diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h +new file mode 100644 +index 00000000000..c9f8001e590 +--- /dev/null ++++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h +@@ -0,0 +1,67 @@ ++/* ++ * Copyright 2023 Conor McCarthy for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_SHADER_UTILS_H ++#define __VKD3D_SHADER_UTILS_H ++ ++#include "vkd3d_shader.h" ++ ++#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') ++#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') ++#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') ++ ++static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, ++ enum vkd3d_shader_source_type *type, char **messages) ++{ ++ struct vkd3d_shader_dxbc_desc desc; ++ enum vkd3d_result ret; ++ unsigned int i; ++ ++ *type = VKD3D_SHADER_SOURCE_NONE; ++ ++ if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) ++ return ret; ++ ++ for (i = 0; i < desc.section_count; ++i) ++ { ++ uint32_t tag = desc.sections[i].tag; ++ if (tag == TAG_SHDR || tag == TAG_SHEX) ++ { ++ *type = VKD3D_SHADER_SOURCE_DXBC_TPF; ++#ifndef VKD3D_SHADER_UNSUPPORTED_DXIL ++ break; ++#else ++ } ++ else if (tag == TAG_DXIL) ++ { ++ *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; ++ /* Default to DXIL if both are present. */ ++ break; ++#endif ++ } ++ } ++ ++ vkd3d_shader_free_dxbc(&desc); ++ ++ if (*type == VKD3D_SHADER_SOURCE_NONE) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ return VKD3D_OK; ++} ++ ++#endif /* __VKD3D_SHADER_UTILS_H */ diff --git a/libs/vkd3d/include/private/vkd3d_test.h b/libs/vkd3d/include/private/vkd3d_test.h new file mode 100644 index 00000000000..081443c4fa6 @@ -1494,17 +1624,61 @@ index 00000000000..081443c4fa6 + +#endif /* __VKD3D_TEST_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index ff2b15c51dc..72ed3ced671 100644 +index ff2b15c51dc..0ddba9cc0a1 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h -@@ -76,6 +76,7 @@ enum vkd3d_api_version +@@ -76,6 +76,8 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_5, VKD3D_API_VERSION_1_6, VKD3D_API_VERSION_1_7, + VKD3D_API_VERSION_1_8, ++ VKD3D_API_VERSION_1_9, VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; +@@ -206,7 +208,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); + VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); + + VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); ++ ++/** ++ * Acquire the Vulkan queue backing a command queue. ++ * ++ * While a queue is acquired by the client, it is locked so that ++ * neither the vkd3d library nor other threads can submit work to ++ * it. For that reason it should be released as soon as possible with ++ * vkd3d_release_vk_queue(). The lock is not reentrant, so the same ++ * queue must not be acquired more than once by the same thread. ++ * ++ * Work submitted through the Direct3D 12 API exposed by vkd3d is not ++ * always immediately submitted to the Vulkan queue; sometimes it is ++ * kept in another internal queue, which might not necessarily be ++ * empty at the time vkd3d_acquire_vk_queue() is called. For this ++ * reason, work submitted directly to the Vulkan queue might appear to ++ * the Vulkan driver as being submitted before other work submitted ++ * though the Direct3D 12 API. If this is not desired, it is ++ * recommended to synchronize work submission using an ID3D12Fence ++ * object, by submitting to the queue a signal operation after all the ++ * Direct3D 12 work is submitted and waiting for it before calling ++ * vkd3d_acquire_vk_queue(). ++ * ++ * \since 1.0 ++ */ + VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); ++ ++/** ++ * Release the Vulkan queue backing a command queue. ++ * ++ * This must be paired to an earlier corresponding ++ * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan ++ * queue returned by vkd3d_acquire_vk_queue() must not be used any ++ * more. ++ * ++ * \since 1.0 ++ */ + VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); + + VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, diff --git a/libs/vkd3d/include/vkd3d_d3d9types.h b/libs/vkd3d/include/vkd3d_d3d9types.h new file mode 100644 index 00000000000..75d0461409d @@ -1829,17 +2003,452 @@ index 00000000000..c934835dc0a +#endif /* __D3DCOMPILER_H__ */ +#endif /* __VKD3D_D3DCOMPILER_H */ diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 859b8c79792..274241546ea 100644 +index 859b8c79792..01356ce3931 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -49,6 +49,7 @@ enum vkd3d_shader_api_version +@@ -49,6 +49,8 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_5, VKD3D_SHADER_API_VERSION_1_6, VKD3D_SHADER_API_VERSION_1_7, + VKD3D_SHADER_API_VERSION_1_8, ++ VKD3D_SHADER_API_VERSION_1_9, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; +@@ -84,6 +86,16 @@ enum vkd3d_shader_structure_type + * \since 1.3 + */ + VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, ++ /** ++ * The structure is a vkd3d_shader_scan_signature_info structure. ++ * \since 1.9 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, ++ /** ++ * The structure is a vkd3d_shader_varying_map_info structure. ++ * \since 1.9 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_VARYING_MAP_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -133,6 +145,15 @@ enum vkd3d_shader_compile_option_formatting_flags + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), + }; + ++/** Determines how matrices are stored. \since 1.9 */ ++enum vkd3d_shader_compile_option_pack_matrix_order ++{ ++ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR = 0x00000001, ++ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR = 0x00000002, ++ ++ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER), ++}; ++ + enum vkd3d_shader_compile_option_name + { + /** +@@ -163,6 +184,15 @@ enum vkd3d_shader_compile_option_name + * \since 1.7 + */ + VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE = 0x00000006, ++ /** ++ * This option specifies default matrix packing order for HLSL sources. ++ * Explicit variable modifiers or pragmas will take precedence. ++ * ++ * \a value is a member of enum vkd3d_shader_compile_option_pack_matrix_order. ++ * ++ * \since 1.9 ++ */ ++ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER = 0x00000007, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), + }; +@@ -326,6 +356,25 @@ struct vkd3d_shader_parameter + } u; + }; + ++/** ++ * Symbolic register indices for mapping uniform constant register sets in ++ * legacy Direct3D bytecode to constant buffer views in the target environment. ++ * ++ * Members of this enumeration are used in ++ * \ref vkd3d_shader_resource_binding.register_index. ++ * ++ * \since 1.9 ++ */ ++enum vkd3d_shader_d3dbc_constant_register ++{ ++ /** The float constant register set, c# in Direct3D assembly. */ ++ VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER = 0x0, ++ /** The integer constant register set, i# in Direct3D assembly. */ ++ VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, ++ /** The boolean constant register set, b# in Direct3D assembly. */ ++ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, ++}; ++ + /** + * Describes the mapping of a single resource or resource array to its binding + * point in the target environment. +@@ -350,7 +399,14 @@ struct vkd3d_shader_resource_binding + * support multiple register spaces, this parameter must be set to 0. + */ + unsigned int register_space; +- /** Register index of the DXBC resource. */ ++ /** ++ * Register index of the Direct3D resource. ++ * ++ * For legacy Direct3D shaders, vkd3d-shader maps each constant register ++ * set to a single constant buffer view. This parameter names the register ++ * set to map, and must be a member of ++ * enum vkd3d_shader_d3dbc_constant_register. ++ */ + unsigned int register_index; + /** Shader stage(s) to which the resource is visible. */ + enum vkd3d_shader_visibility shader_visibility; +@@ -610,6 +666,11 @@ enum vkd3d_shader_source_type + * model 1, 2, and 3 shaders. \since 1.3 + */ + VKD3D_SHADER_SOURCE_D3D_BYTECODE, ++ /** ++ * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is ++ * the format used for Direct3D shader model 6 shaders. \since 1.9 ++ */ ++ VKD3D_SHADER_SOURCE_DXBC_DXIL, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), + }; +@@ -619,7 +680,7 @@ enum vkd3d_shader_target_type + { + /** + * The shader has no type or is to be ignored. This is not a valid value +- * for vkd3d_shader_compile() or vkd3d_shader_scan(). ++ * for vkd3d_shader_compile(). + */ + VKD3D_SHADER_TARGET_NONE, + /** +@@ -1280,6 +1341,8 @@ enum vkd3d_shader_descriptor_info_flag + /** The descriptor is a UAV resource, on which the shader performs + * atomic ops. \since 1.6 */ + VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS = 0x00000008, ++ /** The descriptor is a raw (byte-addressed) buffer. \since 1.9 */ ++ VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER = 0x00000010, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_INFO_FLAG), + }; +@@ -1319,6 +1382,20 @@ struct vkd3d_shader_descriptor_info + * A chained structure enumerating the descriptors declared by a shader. + * + * This structure extends vkd3d_shader_compile_info. ++ * ++ * When scanning a legacy Direct3D shader, vkd3d-shader enumerates each ++ * constant register set used by the shader as a single constant buffer ++ * descriptor, as follows: ++ * - The \ref vkd3d_shader_descriptor_info.type field is set to ++ * VKD3D_SHADER_DESCRIPTOR_TYPE_CBV. ++ * - The \ref vkd3d_shader_descriptor_info.register_space field is set to zero. ++ * - The \ref vkd3d_shader_descriptor_info.register_index field is set to a ++ * member of enum vkd3d_shader_d3dbc_constant_register denoting which set ++ * is used. ++ * - The \ref vkd3d_shader_descriptor_info.count field is set to one. ++ * ++ * In summary, there may be up to three such descriptors, one for each register ++ * set used by the shader: float, integer, and boolean. + */ + struct vkd3d_shader_scan_descriptor_info + { +@@ -1388,6 +1465,24 @@ enum vkd3d_shader_sysval_semantic + VKD3D_SHADER_SV_TESS_FACTOR_TRIINT = 0x0e, + VKD3D_SHADER_SV_TESS_FACTOR_LINEDET = 0x0f, + VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN = 0x10, ++ /** Render target; SV_Target in Direct3D. \since 1.9 */ ++ VKD3D_SHADER_SV_TARGET = 0x40, ++ /** Depth; SV_Depth in Direct3D. \since 1.9 */ ++ VKD3D_SHADER_SV_DEPTH = 0x41, ++ /** Sample mask; SV_Coverage in Direct3D. \since 1.9 */ ++ VKD3D_SHADER_SV_COVERAGE = 0x42, ++ /** ++ * Depth, which is guaranteed to be greater than or equal to the current ++ * depth; SV_DepthGreaterEqual in Direct3D. \since 1.9 ++ */ ++ VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL = 0x43, ++ /** ++ * Depth, which is guaranteed to be less than or equal to the current ++ * depth; SV_DepthLessEqual in Direct3D. \since 1.9 ++ */ ++ VKD3D_SHADER_SV_DEPTH_LESS_EQUAL = 0x44, ++ /** Stencil reference; SV_StencilRef in Direct3D. \since 1.9 */ ++ VKD3D_SHADER_SV_STENCIL_REF = 0x45, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SYSVAL_SEMANTIC), + }; +@@ -1550,6 +1645,132 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com + | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); + } + ++/** ++ * A chained structure containing descriptions of shader inputs and outputs. ++ * ++ * This structure is currently implemented only for DXBC and legacy D3D bytecode ++ * source types. ++ * For DXBC shaders, the returned information is parsed directly from the ++ * signatures embedded in the DXBC shader. ++ * For legacy D3D shaders, the returned information is synthesized based on ++ * registers declared or used by shader instructions. ++ * For all other shader types, the structure is zeroed. ++ * ++ * All members (except for \ref type and \ref next) are output-only. ++ * ++ * This structure is passed to vkd3d_shader_scan() and extends ++ * vkd3d_shader_compile_info. ++ * ++ * Members of this structure are allocated by vkd3d-shader and should be freed ++ * with vkd3d_shader_free_scan_signature_info() when no longer needed. ++ * ++ * All signatures may contain pointers into the input shader, and should only ++ * be accessed while the input shader remains valid. ++ * ++ * Signature elements are synthesized from legacy Direct3D bytecode as follows: ++ * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an ++ * uppercase string corresponding to the HLSL name for the usage, e.g. ++ * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. ++ * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the ++ * usage index. ++ * - The \ref vkd3d_shader_signature_element.stream_index is always 0. ++ * ++ * Signature elements are synthesized for any input or output register declared ++ * or used in a legacy Direct3D bytecode shader, including the following: ++ * - Shader model 1 and 2 colour and texture coordinate registers. ++ * - The shader model 1 pixel shader output register. ++ * - Shader model 1 and 2 vertex shader output registers (position, fog, and ++ * point size). ++ * - Shader model 3 pixel shader system value input registers (pixel position ++ * and face). ++ * ++ * \since 1.9 ++ */ ++struct vkd3d_shader_scan_signature_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** The shader input varyings. */ ++ struct vkd3d_shader_signature input; ++ ++ /** The shader output varyings. */ ++ struct vkd3d_shader_signature output; ++ ++ /** The shader patch constant varyings. */ ++ struct vkd3d_shader_signature patch_constant; ++}; ++ ++/** ++ * Describes the mapping of a output varying register in a shader stage, ++ * to an input varying register in the following shader stage. ++ * ++ * This structure is used in struct vkd3d_shader_varying_map_info. ++ */ ++struct vkd3d_shader_varying_map ++{ ++ /** ++ * The signature index (in the output signature) of the output varying. ++ * If greater than or equal to the number of elements in the output ++ * signature, signifies that the varying is consumed by the next stage but ++ * not written by this one. ++ */ ++ unsigned int output_signature_index; ++ /** The register index of the input varying to map this register to. */ ++ unsigned int input_register_index; ++ /** The mask consumed by the destination register. */ ++ unsigned int input_mask; ++}; ++ ++/** ++ * A chained structure which describes how output varyings in this shader stage ++ * should be mapped to input varyings in the next stage. ++ * ++ * This structure is optional. It should not be provided if there is no shader ++ * stage. ++ * However, depending on the input and output formats, this structure may be ++ * necessary in order to generate shaders which correctly match each other. ++ * ++ * If this structure is absent, vkd3d-shader will map varyings from one stage ++ * to another based on their register index. ++ * For Direct3D shader model 3.0, such a default mapping will be incorrect ++ * unless the registers are allocated in the same order, and hence this ++ * field is necessary to correctly match inter-stage varyings. ++ * This mapping may also be necessary under other circumstances where the ++ * varying interface does not match exactly. ++ * ++ * This structure is passed to vkd3d_shader_compile() and extends ++ * vkd3d_shader_compile_info. ++ * ++ * This structure contains only input parameters. ++ * ++ * \since 1.9 ++ */ ++struct vkd3d_shader_varying_map_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_VARYING_MAP_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** ++ * A mapping of output varyings in this shader stage to input varyings ++ * in the next shader stage. ++ * ++ * This mapping should include exactly one element for each varying ++ * consumed by the next shader stage. ++ * If this shader stage outputs a varying that is not consumed by the next ++ * shader stage, that varying should be absent from this array. ++ * ++ * This mapping may be constructed by vkd3d_shader_build_varying_map(). ++ */ ++ const struct vkd3d_shader_varying_map *varying_map; ++ /** The number of registers provided in \ref varying_map. */ ++ unsigned int varying_count; ++}; ++ + #ifdef LIBVKD3D_SHADER_SOURCE + # define VKD3D_SHADER_API VKD3D_EXPORT + #else +@@ -1622,12 +1843,14 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported + * + * Depending on the source and target types, this function may support the + * following chained structures: ++ * - vkd3d_shader_hlsl_source_info + * - vkd3d_shader_interface_info ++ * - vkd3d_shader_varying_map_info + * - vkd3d_shader_scan_descriptor_info ++ * - vkd3d_shader_scan_signature_info + * - vkd3d_shader_spirv_domain_shader_target_info + * - vkd3d_shader_spirv_target_info + * - vkd3d_shader_transform_feedback_info +- * - vkd3d_shader_hlsl_source_info + * + * \param compile_info A chained structure containing compilation parameters. + * +@@ -1783,6 +2006,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver + * Parse shader source code or byte code, returning various types of requested + * information. + * ++ * The \a source_type member of \a compile_info must be set to the type of the ++ * shader. ++ * ++ * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which ++ * case vkd3d_shader_scan() will return information about the shader in ++ * isolation. Alternatively, it may be set to a valid compilation target for the ++ * shader, in which case vkd3d_shader_scan() will return information that ++ * reflects the interface for a shader as it will be compiled to that target. ++ * In this case other chained structures may be appended to \a compile_info as ++ * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, ++ * such as vkd3d_shader_spirv_target_info. ++ * ++ * (For a hypothetical example, suppose the source shader distinguishes float ++ * and integer texture data, but the target environment does not support integer ++ * textures. In this case vkd3d_shader_compile() might translate integer ++ * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would ++ * accurately report whether the texture expects integer or float data, but ++ * using the relevant specific target type would report ++ * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) ++ * + * Currently this function supports the following code types: + * - VKD3D_SHADER_SOURCE_DXBC_TPF + * +@@ -1790,6 +2033,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver + * \n + * The DXBC_TPF scanner supports the following chained structures: + * - vkd3d_shader_scan_descriptor_info ++ * - vkd3d_shader_scan_signature_info + * \n + * Although the \a compile_info parameter is read-only, chained structures + * passed to this function need not be, and may serve as output parameters, +@@ -1826,12 +2070,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( + struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); + + /** +- * Read the input signature of a compiled shader, returning a structural ++ * Read the input signature of a compiled DXBC shader, returning a structural + * description which can be easily parsed by C code. + * + * This function parses a compiled shader. To parse a standalone root signature, + * use vkd3d_shader_parse_root_signature(). + * ++ * This function only parses DXBC shaders, and only retrieves the input ++ * signature. To retrieve signatures from other shader types, or other signature ++ * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. ++ * This function returns the same input signature that is returned in ++ * struct vkd3d_shader_scan_signature_info. ++ * + * \param dxbc Compiled byte code, in DXBC format. + * + * \param signature Output location in which the parsed root signature will be +@@ -2021,6 +2271,48 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb + VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, + const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); + ++/** ++ * Free members of struct vkd3d_shader_scan_signature_info allocated by ++ * vkd3d_shader_scan(). ++ * ++ * This function may free members of vkd3d_shader_scan_signature_info, but ++ * does not free the structure itself. ++ * ++ * \param info Scan information to free. ++ * ++ * \since 1.9 ++ */ ++VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); ++ ++/** ++ * Build a mapping of output varyings in a shader stage to input varyings in ++ * the following shader stage. ++ * ++ * This mapping should be used in struct vkd3d_shader_varying_map_info to ++ * compile the first shader. ++ * ++ * \param output_signature The output signature of the first shader. ++ * ++ * \param input_signature The input signature of the second shader. ++ * ++ * \param count On output, contains the number of entries written into ++ * \ref varyings. ++ * ++ * \param varyings Pointer to an output array of varyings. ++ * This must point to space for N varyings, where N is the number of elements ++ * in the input signature. ++ * ++ * \remark Valid legacy Direct3D pixel shaders have at most 12 varying inputs: ++ * 10 inter-stage varyings, face, and position. ++ * Therefore, in practice, it is safe to call this function with a ++ * pre-allocated array with a fixed size of 12. ++ * ++ * \since 1.9 ++ */ ++VKD3D_SHADER_API void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, ++ const struct vkd3d_shader_signature *input_signature, ++ unsigned int *count, struct vkd3d_shader_varying_map *varyings); ++ + #endif /* VKD3D_SHADER_NO_PROTOTYPES */ + + /** Type of vkd3d_shader_get_version(). */ +@@ -2086,6 +2378,13 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, + typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, + const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); + ++/** Type of vkd3d_shader_build_varying_map(). \since 1.9 */ ++typedef void (*PFN_vkd3d_shader_build_varying_map)(const struct vkd3d_shader_signature *output_signature, ++ const struct vkd3d_shader_signature *input_signature, ++ unsigned int *count, struct vkd3d_shader_varying_map *varyings); ++/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ ++typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); ++ + #ifdef __cplusplus + } + #endif /* __cplusplus */ diff --git a/libs/vkd3d/include/vkd3d_utils.h b/libs/vkd3d/include/vkd3d_utils.h new file mode 100644 index 00000000000..e8462563576 @@ -1956,10 +2565,10 @@ index 00000000000..e8462563576 +#endif /* __VKD3D_UTILS_H */ diff --git a/libs/vkd3d/include/vkd3d_windows.h b/libs/vkd3d/include/vkd3d_windows.h new file mode 100644 -index 00000000000..002ff667cbc +index 00000000000..7b0e972d828 --- /dev/null +++ b/libs/vkd3d/include/vkd3d_windows.h -@@ -0,0 +1,284 @@ +@@ -0,0 +1,289 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * @@ -2026,6 +2635,7 @@ index 00000000000..002ff667cbc + +# define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) +# define DXGI_ERROR_MORE_DATA _HRESULT_TYPEDEF_(0x887a0003) ++# define DXGI_ERROR_UNSUPPORTED _HRESULT_TYPEDEF_(0x887a0004) + +# define D3DERR_INVALIDCALL _HRESULT_TYPEDEF_(0x8876086c) + @@ -2086,6 +2696,8 @@ index 00000000000..002ff667cbc +# endif + +typedef GUID IID; ++typedef GUID CLSID; ++typedef GUID UUID; + +# ifdef INITGUID +# ifndef __cplusplus @@ -2184,9 +2796,11 @@ index 00000000000..002ff667cbc +# if defined(__cplusplus) && !defined(CINTERFACE) +# define REFIID const IID & +# define REFGUID const GUID & ++# define REFCLSID const CLSID & +# else +# define REFIID const IID * const +# define REFGUID const GUID * const ++# define REFCLSID const CLSID * const +# endif + +#if defined(__cplusplus) && !defined(CINTERFACE) @@ -2257,10 +2871,18 @@ index 30205088b1b..ce00e536d39 100644 #include "vkd3d_blob.h" #include "vkd3d_debug.h" diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index 499334a35f1..b363efbd360 100644 +index 499334a35f1..aa7df5bd764 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -40,9 +40,9 @@ +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + #ifdef HAVE_PTHREAD_H + #include + #endif +@@ -40,15 +41,15 @@ #define VKD3D_DEBUG_BUFFER_COUNT 64 #define VKD3D_DEBUG_BUFFER_SIZE 512 @@ -2270,13 +2892,38 @@ index 499334a35f1..b363efbd360 100644 -static const char *debug_level_names[] = +static const char *const debug_level_names[] = { - /* VKD3D_DBG_LEVEL_NONE */ "none", - /* VKD3D_DBG_LEVEL_ERR */ "err", +- /* VKD3D_DBG_LEVEL_NONE */ "none", +- /* VKD3D_DBG_LEVEL_ERR */ "err", +- /* VKD3D_DBG_LEVEL_FIXME */ "fixme", +- /* VKD3D_DBG_LEVEL_WARN */ "warn", +- /* VKD3D_DBG_LEVEL_TRACE */ "trace", ++ [VKD3D_DBG_LEVEL_NONE ] = "none", ++ [VKD3D_DBG_LEVEL_ERR ] = "err", ++ [VKD3D_DBG_LEVEL_FIXME] = "fixme", ++ [VKD3D_DBG_LEVEL_WARN ] = "warn", ++ [VKD3D_DBG_LEVEL_TRACE] = "trace", + }; + + enum vkd3d_dbg_level vkd3d_dbg_get_level(void) +@@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch + + assert(level < ARRAY_SIZE(debug_level_names)); + ++#ifdef _WIN32 ++ vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); ++#elif HAVE_GETTID ++ vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); ++#else + vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); ++#endif + va_start(args, fmt); + vkd3d_dbg_voutput(fmt, args); + va_end(args); diff --git a/libs/vkd3d/libs/vkd3d-shader/trace.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -similarity index 98% +similarity index 97% rename from libs/vkd3d/libs/vkd3d-shader/trace.c rename to libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 6cd2dcb270c..0a821b5c878 100644 +index 6cd2dcb270c..f0c386f1b3a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/trace.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -109,6 +109,7 @@ static const char * const shader_opcode_names[] = @@ -2287,6 +2934,68 @@ index 6cd2dcb270c..0a821b5c878 100644 [VKD3DSIH_DIV ] = "div", [VKD3DSIH_DLT ] = "dlt", [VKD3DSIH_DMAX ] = "dmax", +@@ -577,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e + { + static const char *const resource_type_names[] = + { +- /* VKD3D_SHADER_RESOURCE_NONE */ "none", +- /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", +- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", ++ [VKD3D_SHADER_RESOURCE_NONE ] = "none", ++ [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", ++ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", + }; + + if (type < ARRAY_SIZE(resource_type_names)) +@@ -600,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const + { + static const char *const data_type_names[] = + { +- /* VKD3D_DATA_FLOAT */ "float", +- /* VKD3D_DATA_INT */ "int", +- /* VKD3D_DATA_RESOURCE */ "resource", +- /* VKD3D_DATA_SAMPLER */ "sampler", +- /* VKD3D_DATA_UAV */ "uav", +- /* VKD3D_DATA_UINT */ "uint", +- /* VKD3D_DATA_UNORM */ "unorm", +- /* VKD3D_DATA_SNORM */ "snorm", +- /* VKD3D_DATA_OPAQUE */ "opaque", +- /* VKD3D_DATA_MIXED */ "mixed", +- /* VKD3D_DATA_DOUBLE */ "double", +- /* VKD3D_DATA_CONTINUED */ "", +- /* VKD3D_DATA_UNUSED */ "", ++ [VKD3D_DATA_FLOAT ] = "float", ++ [VKD3D_DATA_INT ] = "int", ++ [VKD3D_DATA_RESOURCE ] = "resource", ++ [VKD3D_DATA_SAMPLER ] = "sampler", ++ [VKD3D_DATA_UAV ] = "uav", ++ [VKD3D_DATA_UINT ] = "uint", ++ [VKD3D_DATA_UNORM ] = "unorm", ++ [VKD3D_DATA_SNORM ] = "snorm", ++ [VKD3D_DATA_OPAQUE ] = "opaque", ++ [VKD3D_DATA_MIXED ] = "mixed", ++ [VKD3D_DATA_DOUBLE ] = "double", ++ [VKD3D_DATA_CONTINUED] = "", ++ [VKD3D_DATA_UNUSED ] = "", + }; + const char *name; + int i; @@ -645,7 +646,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, break; @@ -2307,6 +3016,15 @@ index 6cd2dcb270c..0a821b5c878 100644 shader_dump_resource_type(compiler, semantic->resource_type); if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) +@@ -712,7 +714,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, + break; + + case VKD3D_DECL_USAGE_TEXCOORD: +- shader_addline(buffer, "texture%u", semantic->usage_idx); ++ shader_addline(buffer, "texcoord%u", semantic->usage_idx); + break; + + case VKD3D_DECL_USAGE_TANGENT: @@ -1505,9 +1507,9 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile { case VKD3DSIH_BREAKP: @@ -2370,7 +3088,7 @@ index 6cd2dcb270c..0a821b5c878 100644 end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index ed81137d225..712613ac13b 100644 +index ed81137d225..1fd5ab2446d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1,4 +1,6 @@ @@ -2397,7 +3115,7 @@ index ed81137d225..712613ac13b 100644 #define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu -@@ -207,7 +210,7 @@ struct vkd3d_sm1_opcode_info +@@ -207,10 +210,13 @@ struct vkd3d_sm1_opcode_info struct vkd3d_shader_sm1_parser { const struct vkd3d_sm1_opcode_info *opcode_table; @@ -2406,7 +3124,37 @@ index ed81137d225..712613ac13b 100644 bool abort; struct vkd3d_shader_parser p; -@@ -462,6 +465,7 @@ static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader ++ ++#define MAX_CONSTANT_COUNT 8192 ++ uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; + }; + + /* This table is not order or position dependent. */ +@@ -257,9 +263,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = + /* Declarations */ + {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, + /* Constant definitions */ +- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, ++ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, ++ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + /* Control flow */ + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, +@@ -324,9 +330,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = + /* Declarations */ + {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, + /* Constant definitions */ +- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, ++ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, ++ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, + /* Control flow */ + {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, + {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, +@@ -462,6 +468,7 @@ static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader src->reg.idx[1].rel_addr = NULL; src->reg.idx[2].offset = ~0u; src->reg.idx[2].rel_addr = NULL; @@ -2414,7 +3162,7 @@ index ed81137d225..712613ac13b 100644 src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; } -@@ -480,6 +484,7 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader +@@ -480,11 +487,315 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader dst->reg.idx[1].rel_addr = NULL; dst->reg.idx[2].offset = ~0u; dst->reg.idx[2].rel_addr = NULL; @@ -2422,7 +3170,324 @@ index ed81137d225..712613ac13b 100644 dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT; dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; -@@ -661,6 +666,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const + } + ++static struct signature_element *find_signature_element(const struct shader_signature *signature, ++ const char *semantic_name, unsigned int semantic_index) ++{ ++ struct signature_element *e = signature->elements; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) ++ && e[i].semantic_index == semantic_index) ++ return &e[i]; ++ } ++ ++ return NULL; ++} ++ ++static struct signature_element *find_signature_element_by_register_index( ++ const struct shader_signature *signature, unsigned int register_index) ++{ ++ struct signature_element *e = signature->elements; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ if (e[i].register_index == register_index) ++ return &e[i]; ++ } ++ ++ return NULL; ++} ++ ++#define SM1_COLOR_REGISTER_OFFSET 8 ++ ++static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, ++ const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, ++ unsigned int register_index, bool is_dcl, unsigned int mask) ++{ ++ struct shader_signature *signature; ++ struct signature_element *element; ++ ++ if (output) ++ signature = &sm1->p.shader_desc.output_signature; ++ else ++ signature = &sm1->p.shader_desc.input_signature; ++ ++ if ((element = find_signature_element(signature, name, index))) ++ { ++ element->mask |= mask; ++ if (!is_dcl) ++ element->used_mask |= mask; ++ return true; ++ } ++ ++ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, ++ signature->element_count + 1, sizeof(*signature->elements))) ++ return false; ++ element = &signature->elements[signature->element_count++]; ++ ++ element->semantic_name = name; ++ element->semantic_index = index; ++ element->stream_index = 0; ++ element->sysval_semantic = sysval; ++ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ element->register_index = register_index; ++ element->target_location = register_index; ++ element->register_count = 1; ++ element->mask = mask; ++ element->used_mask = is_dcl ? 0 : mask; ++ element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; ++ ++ return true; ++} ++ ++static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, ++ unsigned int register_index, unsigned int mask) ++{ ++ struct shader_signature *signature; ++ struct signature_element *element; ++ ++ if (output) ++ signature = &sm1->p.shader_desc.output_signature; ++ else ++ signature = &sm1->p.shader_desc.input_signature; ++ ++ if (!(element = find_signature_element_by_register_index(signature, register_index))) ++ { ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, ++ "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); ++ return; ++ } ++ ++ element->used_mask |= mask; ++} ++ ++static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) ++{ ++ unsigned int register_index = reg->idx[0].offset; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_TEMP: ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL ++ && sm1->p.shader_version.major == 1 && !register_index) ++ return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_TARGET, 0, is_dcl, mask); ++ return true; ++ ++ case VKD3DSPR_INPUT: ++ /* For vertex shaders or sm3 pixel shaders, we should have already ++ * had a DCL instruction. Otherwise, this is a colour input. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) ++ { ++ add_signature_mask(sm1, false, register_index, mask); ++ return true; ++ } ++ return add_signature_element(sm1, false, "COLOR", register_index, ++ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); ++ ++ case VKD3DSPR_TEXTURE: ++ /* For vertex shaders, this is ADDR. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ return true; ++ return add_signature_element(sm1, false, "TEXCOORD", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ ++ case VKD3DSPR_OUTPUT: ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ { ++ /* For sm < 2 vertex shaders, this is TEXCRDOUT. ++ * ++ * For sm3 vertex shaders, this is OUTPUT, but we already ++ * should have had a DCL instruction. */ ++ if (sm1->p.shader_version.major == 3) ++ { ++ add_signature_mask(sm1, true, register_index, mask); ++ return true; ++ } ++ return add_signature_element(sm1, true, "TEXCOORD", register_index, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); ++ } ++ /* fall through */ ++ ++ case VKD3DSPR_ATTROUT: ++ return add_signature_element(sm1, true, "COLOR", register_index, ++ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); ++ ++ case VKD3DSPR_COLOROUT: ++ return add_signature_element(sm1, true, "COLOR", register_index, ++ VKD3D_SHADER_SV_TARGET, register_index, is_dcl, mask); ++ ++ case VKD3DSPR_DEPTHOUT: ++ return add_signature_element(sm1, true, "DEPTH", 0, ++ VKD3D_SHADER_SV_DEPTH, register_index, is_dcl, 0x1); ++ ++ case VKD3DSPR_RASTOUT: ++ switch (register_index) ++ { ++ case 0: ++ return add_signature_element(sm1, true, "POSITION", 0, ++ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); ++ ++ case 1: ++ return add_signature_element(sm1, true, "FOG", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ case 2: ++ return add_signature_element(sm1, true, "PSIZE", 0, ++ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); ++ ++ default: ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, ++ "Invalid rasterizer output index %u.", register_index); ++ return true; ++ } ++ ++ case VKD3DSPR_MISCTYPE: ++ switch (register_index) ++ { ++ case 0: ++ return add_signature_element(sm1, false, "VPOS", 0, ++ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); ++ ++ case 1: ++ return add_signature_element(sm1, false, "VFACE", 0, ++ VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); ++ ++ default: ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, ++ "Invalid miscellaneous fragment input index %u.", register_index); ++ return true; ++ } ++ ++ default: ++ return true; ++ } ++} ++ ++static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_semantic *semantic) ++{ ++ const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; ++ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ unsigned int mask = semantic->resource.reg.write_mask; ++ bool output; ++ ++ static const char sm1_semantic_names[][13] = ++ { ++ [VKD3D_DECL_USAGE_POSITION ] = "POSITION", ++ [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", ++ [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", ++ [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", ++ [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", ++ [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", ++ [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", ++ [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", ++ [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", ++ [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", ++ [VKD3D_DECL_USAGE_COLOR ] = "COLOR", ++ [VKD3D_DECL_USAGE_FOG ] = "FOG", ++ [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", ++ [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", ++ }; ++ ++ if (reg->type == VKD3DSPR_OUTPUT) ++ output = true; ++ else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) ++ output = false; ++ else /* vpos and vface don't have a semantic. */ ++ return add_signature_element_from_register(sm1, reg, true, mask); ++ ++ /* sm2 pixel shaders use DCL but don't provide a semantic. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) ++ return add_signature_element_from_register(sm1, reg, true, mask); ++ ++ /* With the exception of vertex POSITION output, none of these are system ++ * values. Pixel POSITION input is not equivalent to SV_Position; the closer ++ * equivalent is VPOS, which is not declared as a semantic. */ ++ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ ++ return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], ++ semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); ++} ++ ++static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, ++ enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) ++{ ++ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; ++ ++ desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); ++ if (from_def) ++ { ++ /* d3d shaders have a maximum of 8192 constants; we should not overrun ++ * this array. */ ++ assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); ++ bitmap_set(sm1->constant_def_mask[set], index); ++ } ++} ++ ++static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, ++ const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) ++{ ++ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; ++ uint32_t register_index = reg->idx[0].offset; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_TEMP: ++ desc->temp_count = max(desc->temp_count, register_index + 1); ++ break; ++ ++ case VKD3DSPR_CONST: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONST2: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONST3: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONST4: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONSTINT: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); ++ break; ++ ++ case VKD3DSPR_CONSTBOOL: ++ record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); ++ break; ++ ++ default: ++ break; ++ } ++ ++ add_signature_element_from_register(sm1, reg, false, mask); ++} ++ + /* Read a parameter token from the input stream, and possibly a relative + * addressing token. */ + static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, +@@ -635,6 +946,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, + range = &semantic->resource.range; + range->space = 0; + range->first = range->last = semantic->resource.reg.reg.idx[0].offset; ++ ++ add_signature_element_from_semantic(sm1, semantic); + } + + static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, +@@ -661,6 +974,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const src_param->reg.idx[1].rel_addr = NULL; src_param->reg.idx[2].offset = ~0u; src_param->reg.idx[2].rel_addr = NULL; @@ -2430,7 +3495,7 @@ index ed81137d225..712613ac13b 100644 src_param->reg.immconst_type = type; memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t)); src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -@@ -671,7 +677,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const +@@ -671,7 +985,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) { @@ -2439,11 +3504,19 @@ index ed81137d225..712613ac13b 100644 const char *comment; unsigned int size; size_t remaining; -@@ -738,13 +744,12 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, +@@ -738,13 +1052,20 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, } } -static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) ++static unsigned int mask_from_swizzle(unsigned int swizzle) ++{ ++ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); ++} ++ +static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); @@ -2455,7 +3528,7 @@ index ed81137d225..712613ac13b 100644 uint32_t opcode_token; const uint32_t *p; bool predicated; -@@ -758,11 +763,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru +@@ -758,11 +1079,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru goto fail; } @@ -2469,7 +3542,7 @@ index ed81137d225..712613ac13b 100644 "Invalid opcode %#x (token 0x%08x, shader version %u.%u).", opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token, sm1->p.shader_version.major, sm1->p.shader_version.minor); -@@ -775,14 +780,14 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru +@@ -775,14 +1096,14 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru ins->raw = false; ins->structured = false; predicated = !!(opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED); @@ -2488,7 +3561,47 @@ index ed81137d225..712613ac13b 100644 goto fail; } -@@ -852,10 +857,9 @@ fail: +@@ -812,22 +1133,28 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } + else if (ins->handler_idx == VKD3DSIH_DEFB) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } + else if (ins->handler_idx == VKD3DSIH_DEFI) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } + else + { + /* Destination token */ + if (ins->dst_count) ++ { + shader_sm1_read_dst_param(sm1, &p, dst_param); ++ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false); ++ } + + /* Predication token */ + if (ins->predicate) +@@ -835,7 +1162,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru + + /* Other source tokens */ + for (i = 0; i < ins->src_count; ++i) ++ { + shader_sm1_read_src_param(sm1, &p, &src_params[i]); ++ shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false); ++ } + } + + if (sm1->abort) +@@ -852,10 +1182,9 @@ fail: *ptr = sm1->end; } @@ -2501,7 +3614,7 @@ index ed81137d225..712613ac13b 100644 shader_sm1_read_comment(sm1); -@@ -938,7 +942,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, +@@ -938,17 +1267,35 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, shader_desc = &sm1->p.shader_desc; shader_desc->byte_code = code; shader_desc->byte_code_size = code_size; @@ -2510,7 +3623,35 @@ index ed81137d225..712613ac13b 100644 return VKD3D_OK; } -@@ -965,7 +969,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + ++static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, ++ enum vkd3d_shader_d3dbc_constant_register set) ++{ ++ unsigned int j; ++ ++ /* Find the highest constant index which is not written by a DEF ++ * instruction. We can't (easily) use an FFZ function for this since it ++ * needs to be limited by the highest used register index. */ ++ for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) ++ { ++ if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) ++ return j; ++ } ++ ++ return 0; ++} ++ + int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) + { + struct vkd3d_shader_instruction_array *instructions; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm1_parser *sm1; ++ unsigned int i; + int ret; + + if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) +@@ -965,7 +1312,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi } instructions = &sm1->p.instructions; @@ -2519,7 +3660,7 @@ index ed81137d225..712613ac13b 100644 { if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) { -@@ -975,7 +979,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi +@@ -975,7 +1322,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi return VKD3D_ERROR_OUT_OF_MEMORY; } ins = &instructions->elements[instructions->count]; @@ -2528,11 +3669,14 @@ index ed81137d225..712613ac13b 100644 if (ins->handler_idx == VKD3DSIH_INVALID) { -@@ -988,5 +992,1094 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi +@@ -988,5 +1335,1133 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi *parser = &sm1->p; - return VKD3D_OK; ++ for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) ++ sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); ++ + return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + @@ -2833,17 +3977,12 @@ index ed81137d225..712613ac13b 100644 + + if (var->is_param && var->is_uniform) + { -+ struct vkd3d_string_buffer *name; ++ char *new_name; + -+ if (!(name = hlsl_get_string_buffer(ctx))) -+ { -+ buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; ++ if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) + return; -+ } -+ vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); -+ var->name = hlsl_strdup(ctx, name->buffer); -+ hlsl_release_string_buffer(ctx, name); ++ var->name = new_name; + } + } + } @@ -2881,7 +4020,7 @@ index ed81137d225..712613ac13b 100644 + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -+ put_u32(buffer, var->regs[r].bind_count); ++ put_u32(buffer, var->bind_count[r]); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ @@ -3094,12 +4233,13 @@ index ed81137d225..712613ac13b 100644 + + for (i = 0; i < ctx->constant_defs.count; ++i) + { ++ const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { + .type = D3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, -+ .reg = i, ++ .reg = constant_reg->index, + }; + + if (ctx->profile->major_version > 1) @@ -3108,7 +4248,7 @@ index ed81137d225..712613ac13b 100644 + + write_sm1_dst_register(buffer, ®); + for (x = 0; x < 4; ++x) -+ put_f32(buffer, ctx->constant_defs.values[i].f[x]); ++ put_f32(buffer, constant_reg->value.f[x]); + } +} + @@ -3181,10 +4321,6 @@ index ed81137d225..712613ac13b 100644 + + switch (sampler_dim) + { -+ case HLSL_SAMPLER_DIM_1D: -+ res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; -+ break; -+ + case HLSL_SAMPLER_DIM_2D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; + break; @@ -3227,14 +4363,19 @@ index ed81137d225..712613ac13b 100644 + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + -+ count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; ++ count = var->bind_count[HLSL_REGSET_SAMPLERS]; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; -+ assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); ++ if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) ++ { ++ /* These can appear in sm4-style combined sample instructions. */ ++ hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); ++ continue; ++ } + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); @@ -3385,6 +4526,35 @@ index ed81137d225..712613ac13b 100644 + } +} + ++static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); ++ ++ switch (jump->type) ++ { ++ case HLSL_IR_JUMP_DISCARD_NEG: ++ { ++ struct hlsl_reg *reg = &jump->condition.node->reg; ++ ++ struct sm1_instruction instr = ++ { ++ .opcode = VKD3D_SM1_OP_TEXKILL, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = reg->id, ++ .dst.writemask = reg->writemask, ++ .has_dst = 1, ++ }; ++ ++ write_sm1_instruction(ctx, buffer, &instr); ++ break; ++ } ++ ++ default: ++ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); ++ } ++} ++ +static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_load *load = hlsl_ir_load(instr); @@ -3579,6 +4749,10 @@ index ed81137d225..712613ac13b 100644 + write_sm1_expr(ctx, buffer, instr); + break; + ++ case HLSL_IR_JUMP: ++ write_sm1_jump(ctx, buffer, instr); ++ break; ++ + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; @@ -3604,7 +4778,6 @@ index ed81137d225..712613ac13b 100644 +int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct vkd3d_bytecode_buffer buffer = {0}; -+ int ret; + + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + @@ -3617,15 +4790,22 @@ index ed81137d225..712613ac13b 100644 + + put_u32(&buffer, D3DSIO_END); + -+ if (!(ret = buffer.status)) ++ if (buffer.status) ++ ctx->result = buffer.status; ++ ++ if (!ctx->result) + { + out->code = buffer.data; + out->size = buffer.size; + } -+ return ret; ++ else ++ { ++ vkd3d_free(buffer.data); ++ } ++ return ctx->result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index d99ea2e36b6..3e3f06faeb5 100644 +index d99ea2e36b6..1cb00688c76 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -19,1680 +19,74 @@ @@ -5351,9 +6531,30 @@ index d99ea2e36b6..3e3f06faeb5 100644 } static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) -@@ -1928,12 +322,12 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, +@@ -1927,13 +321,33 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, + return ret; } ++/* Shader Model 6 shaders use these special values in the output signature, ++ * but Shader Model 4/5 just use VKD3D_SHADER_SV_NONE. Normalize to SM6. */ ++static enum vkd3d_shader_sysval_semantic map_fragment_output_sysval(const char *name) ++{ ++ if (!ascii_strcasecmp(name, "sv_target")) ++ return VKD3D_SHADER_SV_TARGET; ++ if (!ascii_strcasecmp(name, "sv_depth")) ++ return VKD3D_SHADER_SV_DEPTH; ++ if (!ascii_strcasecmp(name, "sv_coverage")) ++ return VKD3D_SHADER_SV_COVERAGE; ++ if (!ascii_strcasecmp(name, "sv_depthgreaterequal")) ++ return VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL; ++ if (!ascii_strcasecmp(name, "sv_depthlessequal")) ++ return VKD3D_SHADER_SV_DEPTH_LESS_EQUAL; ++ if (!ascii_strcasecmp(name, "sv_stencilref")) ++ return VKD3D_SHADER_SV_STENCIL_REF; ++ ++ return VKD3D_SHADER_SV_NONE; ++} ++ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *s) + struct vkd3d_shader_message_context *message_context, struct shader_signature *s) @@ -5366,7 +6567,7 @@ index d99ea2e36b6..3e3f06faeb5 100644 const char *ptr = data; unsigned int i; -@@ -1979,6 +373,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s +@@ -1979,6 +393,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s { uint32_t name_offset, mask; @@ -5375,15 +6576,26 @@ index d99ea2e36b6..3e3f06faeb5 100644 if (has_stream_index) read_dword(&ptr, &e[i].stream_index); else -@@ -1995,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s +@@ -1995,6 +411,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s read_dword(&ptr, &e[i].sysval_semantic); read_dword(&ptr, &e[i].component_type); read_dword(&ptr, &e[i].register_index); ++ e[i].target_location = e[i].register_index; + e[i].register_count = 1; read_dword(&ptr, &mask); e[i].mask = mask & 0xff; e[i].used_mask = (mask >> 8) & 0xff; -@@ -2029,7 +426,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s +@@ -2003,6 +421,9 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s + case TAG_OSGN: + case TAG_OSG1: + case TAG_OSG5: ++ if (e[i].sysval_semantic == VKD3D_SHADER_SV_NONE) ++ e[i].sysval_semantic = map_fragment_output_sysval(e[i].semantic_name); ++ /* Fall through. */ + case TAG_PCSG: + case TAG_PSG1: + e[i].used_mask = e[i].mask & ~e[i].used_mask; +@@ -2029,7 +450,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *ctx) { @@ -5392,7 +6604,7 @@ index d99ea2e36b6..3e3f06faeb5 100644 if (section->tag != TAG_ISGN) return VKD3D_OK; -@@ -2037,13 +434,13 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, +@@ -2037,13 +458,13 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, if (is->elements) { FIXME("Multiple input signatures.\n"); @@ -5408,7 +6620,33 @@ index d99ea2e36b6..3e3f06faeb5 100644 { int ret; -@@ -2122,12 +519,12 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, +@@ -2096,8 +517,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + return ret; + break; + ++ case TAG_DXIL: + case TAG_SHDR: + case TAG_SHEX: ++ if ((section->tag == TAG_DXIL) != desc->is_dxil) ++ { ++ TRACE("Skipping chunk %#x.\n", section->tag); ++ break; ++ } + if (desc->byte_code) + FIXME("Multiple shader code chunks.\n"); + desc->byte_code = section->data.code; +@@ -2108,10 +535,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + TRACE("Skipping AON9 shader code chunk.\n"); + break; + +- case TAG_DXIL: +- FIXME("Skipping DXIL shader model 6+ code chunk.\n"); +- break; +- + default: + TRACE("Skipping chunk %#x.\n", section->tag); + break; +@@ -2122,22 +545,16 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, void free_shader_desc(struct vkd3d_shader_desc *desc) { @@ -5425,7 +6663,17 @@ index d99ea2e36b6..3e3f06faeb5 100644 struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) { int ret; -@@ -2151,66 +548,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + +- desc->byte_code = NULL; +- desc->byte_code_size = 0; +- memset(&desc->input_signature, 0, sizeof(desc->input_signature)); +- memset(&desc->output_signature, 0, sizeof(desc->output_signature)); +- memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); +- + ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); + if (!desc->byte_code) + ret = VKD3D_ERROR_INVALID_ARGUMENT; +@@ -2151,66 +568,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, return ret; } @@ -5492,7 +6740,7 @@ index d99ea2e36b6..3e3f06faeb5 100644 /* root signatures */ #define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE -@@ -2862,7 +1199,7 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co +@@ -2862,7 +1219,7 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co size_t parameters_position; unsigned int i; @@ -5501,11 +6749,3013 @@ index d99ea2e36b6..3e3f06faeb5 100644 for (i = 0; i < parameter_count; ++i) { put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +new file mode 100644 +index 00000000000..b78c78d34a7 +--- /dev/null ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -0,0 +1,2968 @@ ++/* ++ * Copyright 2023 Conor McCarthy for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#include "vkd3d_shader_private.h" ++ ++#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) ++#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) ++ ++#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) ++#define DXIL_OP_MAX_OPERANDS 17 ++ ++enum bitcode_block_id ++{ ++ BLOCKINFO_BLOCK = 0, ++ MODULE_BLOCK = 8, ++ PARAMATTR_BLOCK = 9, ++ PARAMATTR_GROUP_BLOCK = 10, ++ CONSTANTS_BLOCK = 11, ++ FUNCTION_BLOCK = 12, ++ VALUE_SYMTAB_BLOCK = 14, ++ METADATA_BLOCK = 15, ++ METADATA_ATTACHMENT_BLOCK = 16, ++ TYPE_BLOCK = 17, ++ USELIST_BLOCK = 18, ++}; ++ ++enum bitcode_blockinfo_code ++{ ++ SETBID = 1, ++ BLOCKNAME = 2, ++ SETRECORDNAME = 3, ++}; ++ ++enum bitcode_block_abbreviation ++{ ++ END_BLOCK = 0, ++ ENTER_SUBBLOCK = 1, ++ DEFINE_ABBREV = 2, ++ UNABBREV_RECORD = 3, ++}; ++ ++enum bitcode_abbrev_type ++{ ++ ABBREV_FIXED = 1, ++ ABBREV_VBR = 2, ++ ABBREV_ARRAY = 3, ++ ABBREV_CHAR = 4, ++ ABBREV_BLOB = 5, ++}; ++ ++enum bitcode_address_space ++{ ++ ADDRESS_SPACE_DEFAULT, ++ ADDRESS_SPACE_DEVICEMEM, ++ ADDRESS_SPACE_CBUFFER, ++ ADDRESS_SPACE_GROUPSHARED, ++}; ++ ++enum bitcode_module_code ++{ ++ MODULE_CODE_VERSION = 1, ++ MODULE_CODE_GLOBALVAR = 7, ++ MODULE_CODE_FUNCTION = 8, ++}; ++ ++enum bitcode_constant_code ++{ ++ CST_CODE_SETTYPE = 1, ++ CST_CODE_NULL = 2, ++ CST_CODE_UNDEF = 3, ++ CST_CODE_INTEGER = 4, ++ CST_CODE_FLOAT = 6, ++ CST_CODE_STRING = 8, ++ CST_CODE_CE_GEP = 12, ++ CST_CODE_CE_INBOUNDS_GEP = 20, ++ CST_CODE_DATA = 22, ++}; ++ ++enum bitcode_function_code ++{ ++ FUNC_CODE_DECLAREBLOCKS = 1, ++ FUNC_CODE_INST_BINOP = 2, ++ FUNC_CODE_INST_CAST = 3, ++ FUNC_CODE_INST_RET = 10, ++ FUNC_CODE_INST_BR = 11, ++ FUNC_CODE_INST_SWITCH = 12, ++ FUNC_CODE_INST_PHI = 16, ++ FUNC_CODE_INST_ALLOCA = 19, ++ FUNC_CODE_INST_LOAD = 20, ++ FUNC_CODE_INST_EXTRACTVAL = 26, ++ FUNC_CODE_INST_CMP2 = 28, ++ FUNC_CODE_INST_VSELECT = 29, ++ FUNC_CODE_INST_CALL = 34, ++ FUNC_CODE_INST_ATOMICRMW = 38, ++ FUNC_CODE_INST_LOADATOMIC = 41, ++ FUNC_CODE_INST_GEP = 43, ++ FUNC_CODE_INST_STORE = 44, ++ FUNC_CODE_INST_STOREATOMIC = 45, ++ FUNC_CODE_INST_CMPXCHG = 46, ++}; ++ ++enum bitcode_type_code ++{ ++ TYPE_CODE_NUMENTRY = 1, ++ TYPE_CODE_VOID = 2, ++ TYPE_CODE_FLOAT = 3, ++ TYPE_CODE_DOUBLE = 4, ++ TYPE_CODE_LABEL = 5, ++ TYPE_CODE_INTEGER = 7, ++ TYPE_CODE_POINTER = 8, ++ TYPE_CODE_HALF = 10, ++ TYPE_CODE_ARRAY = 11, ++ TYPE_CODE_VECTOR = 12, ++ TYPE_CODE_METADATA = 16, ++ TYPE_CODE_STRUCT_ANON = 18, ++ TYPE_CODE_STRUCT_NAME = 19, ++ TYPE_CODE_STRUCT_NAMED = 20, ++ TYPE_CODE_FUNCTION = 21, ++}; ++ ++enum bitcode_value_symtab_code ++{ ++ VST_CODE_ENTRY = 1, ++ VST_CODE_BBENTRY = 2, ++}; ++ ++enum dx_intrinsic_opcode ++{ ++ DX_STORE_OUTPUT = 5, ++}; ++ ++struct sm6_pointer_info ++{ ++ const struct sm6_type *type; ++ enum bitcode_address_space addr_space; ++}; ++ ++struct sm6_struct_info ++{ ++ const char *name; ++ unsigned int elem_count; ++ const struct sm6_type *elem_types[]; ++}; ++ ++struct sm6_function_info ++{ ++ const struct sm6_type *ret_type; ++ unsigned int param_count; ++ const struct sm6_type *param_types[]; ++}; ++ ++struct sm6_array_info ++{ ++ unsigned int count; ++ const struct sm6_type *elem_type; ++}; ++ ++enum sm6_type_class ++{ ++ TYPE_CLASS_VOID, ++ TYPE_CLASS_INTEGER, ++ TYPE_CLASS_FLOAT, ++ TYPE_CLASS_POINTER, ++ TYPE_CLASS_STRUCT, ++ TYPE_CLASS_FUNCTION, ++ TYPE_CLASS_VECTOR, ++ TYPE_CLASS_ARRAY, ++ TYPE_CLASS_LABEL, ++ TYPE_CLASS_METADATA, ++}; ++ ++struct sm6_type ++{ ++ enum sm6_type_class class; ++ union ++ { ++ unsigned int width; ++ struct sm6_pointer_info pointer; ++ struct sm6_struct_info *struc; ++ struct sm6_function_info *function; ++ struct sm6_array_info array; ++ } u; ++}; ++ ++enum sm6_value_type ++{ ++ VALUE_TYPE_FUNCTION, ++ VALUE_TYPE_REG, ++}; ++ ++struct sm6_function_data ++{ ++ const char *name; ++ bool is_prototype; ++ unsigned int attribs_id; ++}; ++ ++struct sm6_value ++{ ++ const struct sm6_type *type; ++ enum sm6_value_type value_type; ++ bool is_undefined; ++ union ++ { ++ struct sm6_function_data function; ++ struct vkd3d_shader_register reg; ++ } u; ++}; ++ ++struct dxil_record ++{ ++ unsigned int code; ++ unsigned int operand_count; ++ uint64_t operands[]; ++}; ++ ++struct sm6_symbol ++{ ++ unsigned int id; ++ const char *name; ++}; ++ ++struct sm6_block ++{ ++ struct vkd3d_shader_instruction *instructions; ++ size_t instruction_capacity; ++ size_t instruction_count; ++}; ++ ++struct sm6_function ++{ ++ const struct sm6_value *declaration; ++ ++ struct sm6_block *blocks[1]; ++ size_t block_count; ++ ++ size_t value_count; ++}; ++ ++struct dxil_block ++{ ++ const struct dxil_block *parent; ++ enum bitcode_block_id id; ++ unsigned int abbrev_len; ++ unsigned int start; ++ unsigned int length; ++ unsigned int level; ++ ++ /* The abbrev, block and record structs are not relocatable. */ ++ struct dxil_abbrev **abbrevs; ++ size_t abbrev_capacity; ++ size_t abbrev_count; ++ unsigned int blockinfo_bid; ++ bool has_bid; ++ ++ struct dxil_block **child_blocks; ++ size_t child_block_capacity; ++ size_t child_block_count; ++ ++ struct dxil_record **records; ++ size_t record_capacity; ++ size_t record_count; ++}; ++ ++struct sm6_parser ++{ ++ const uint32_t *ptr, *start, *end; ++ unsigned int bitpos; ++ ++ struct dxil_block root_block; ++ struct dxil_block *current_block; ++ ++ struct dxil_global_abbrev **abbrevs; ++ size_t abbrev_capacity; ++ size_t abbrev_count; ++ ++ struct sm6_type *types; ++ size_t type_count; ++ ++ struct sm6_symbol *global_symbols; ++ size_t global_symbol_count; ++ ++ struct vkd3d_shader_dst_param *output_params; ++ ++ struct sm6_function *functions; ++ size_t function_count; ++ ++ struct sm6_value *values; ++ size_t value_count; ++ size_t value_capacity; ++ size_t cur_max_value; ++ ++ struct vkd3d_shader_parser p; ++}; ++ ++struct dxil_abbrev_operand ++{ ++ uint64_t context; ++ bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); ++}; ++ ++struct dxil_abbrev ++{ ++ unsigned int count; ++ bool is_array; ++ struct dxil_abbrev_operand operands[]; ++}; ++ ++struct dxil_global_abbrev ++{ ++ unsigned int block_id; ++ struct dxil_abbrev abbrev; ++}; ++ ++static const uint64_t CALL_CONV_FLAG_EXPLICIT_TYPE = 1ull << 15; ++ ++static size_t size_add_with_overflow_check(size_t a, size_t b) ++{ ++ size_t i = a + b; ++ return (i < a) ? SIZE_MAX : i; ++} ++ ++static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) ++{ ++ return CONTAINING_RECORD(parser, struct sm6_parser, p); ++} ++ ++static bool sm6_parser_is_end(struct sm6_parser *sm6) ++{ ++ return sm6->ptr == sm6->end; ++} ++ ++static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) ++{ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return 0; ++ } ++ return *sm6->ptr++; ++} ++ ++static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) ++{ ++ unsigned int l, prev_len = 0; ++ uint32_t bits; ++ ++ if (!length) ++ return 0; ++ ++ assert(length < 32); ++ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return 0; ++ } ++ ++ assert(sm6->bitpos < 32); ++ bits = *sm6->ptr >> sm6->bitpos; ++ l = 32 - sm6->bitpos; ++ if (l <= length) ++ { ++ ++sm6->ptr; ++ if (sm6_parser_is_end(sm6) && l < length) ++ { ++ sm6->p.failed = true; ++ return bits; ++ } ++ sm6->bitpos = 0; ++ bits |= *sm6->ptr << l; ++ prev_len = l; ++ } ++ sm6->bitpos += length - prev_len; ++ ++ return bits & ((1 << length) - 1); ++} ++ ++static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) ++{ ++ unsigned int bits, flag, mask, shift = 0; ++ uint64_t result = 0; ++ ++ if (!length) ++ return 0; ++ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return 0; ++ } ++ ++ flag = 1 << (length - 1); ++ mask = flag - 1; ++ do ++ { ++ bits = sm6_parser_read_bits(sm6, length); ++ result |= (uint64_t)(bits & mask) << shift; ++ shift += length - 1; ++ } while ((bits & flag) && !sm6->p.failed && shift < 64); ++ ++ sm6->p.failed |= !!(bits & flag); ++ ++ return result; ++} ++ ++static void sm6_parser_align_32(struct sm6_parser *sm6) ++{ ++ if (!sm6->bitpos) ++ return; ++ ++ if (sm6_parser_is_end(sm6)) ++ { ++ sm6->p.failed = true; ++ return; ++ } ++ ++ ++sm6->ptr; ++ sm6->bitpos = 0; ++} ++ ++static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) ++{ ++ /* BLOCKINFO blocks must only occur immediately below the module root block. */ ++ if (block->level > 1) ++ { ++ WARN("Invalid blockinfo block level %u.\n", block->level); ++ return false; ++ } ++ ++ switch (record->code) ++ { ++ case SETBID: ++ if (!record->operand_count) ++ { ++ WARN("Missing id operand.\n"); ++ return false; ++ } ++ if (record->operands[0] > UINT_MAX) ++ WARN("Truncating block id %"PRIu64".\n", record->operands[0]); ++ block->blockinfo_bid = record->operands[0]; ++ block->has_bid = true; ++ break; ++ case BLOCKNAME: ++ case SETRECORDNAME: ++ break; ++ default: ++ FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); ++ break; ++ } ++ ++ return true; ++} ++ ++static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) ++{ ++ unsigned int reserve; ++ ++ switch (block->id) ++ { ++ /* Rough initial reserve sizes for small shaders. */ ++ case CONSTANTS_BLOCK: reserve = 32; break; ++ case FUNCTION_BLOCK: reserve = 128; break; ++ case METADATA_BLOCK: reserve = 32; break; ++ case TYPE_BLOCK: reserve = 32; break; ++ default: reserve = 8; break; ++ } ++ reserve = max(reserve, block->record_count + 1); ++ if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) ++ { ++ ERR("Failed to allocate %u records.\n", reserve); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ block->records[block->record_count++] = record; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) ++{ ++ struct dxil_block *block = sm6->current_block; ++ enum vkd3d_result ret = VKD3D_OK; ++ unsigned int code, count, i; ++ struct dxil_record *record; ++ ++ code = sm6_parser_read_vbr(sm6, 6); ++ ++ count = sm6_parser_read_vbr(sm6, 6); ++ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) ++ { ++ ERR("Failed to allocate record with %u operands.\n", count); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ record->code = code; ++ record->operand_count = count; ++ ++ for (i = 0; i < count; ++i) ++ record->operands[i] = sm6_parser_read_vbr(sm6, 6); ++ if (sm6->p.failed) ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ ++ if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) ++ vkd3d_free(record); ++ ++ return ret; ++} ++ ++static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = context; ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = sm6_parser_read_bits(sm6, context); ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = sm6_parser_read_vbr(sm6, context); ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; ++ return !sm6->p.failed; ++} ++ ++static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) ++{ ++ int count = sm6_parser_read_vbr(sm6, 6); ++ sm6_parser_align_32(sm6); ++ for (; count > 0; count -= 4) ++ sm6_parser_read_uint32(sm6); ++ FIXME("Unhandled blob operand.\n"); ++ return false; ++} ++ ++static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) ++{ ++ enum bitcode_abbrev_type prev_type, type; ++ unsigned int i; ++ ++ abbrev->is_array = false; ++ ++ for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) ++ { ++ if (sm6_parser_read_bits(sm6, 1)) ++ { ++ if (prev_type == ABBREV_ARRAY) ++ { ++ WARN("Unexpected literal abbreviation after array.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); ++ abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; ++ continue; ++ } ++ ++ switch (type = sm6_parser_read_bits(sm6, 3)) ++ { ++ case ABBREV_FIXED: ++ case ABBREV_VBR: ++ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); ++ abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand ++ : sm6_parser_read_vbr_operand; ++ break; ++ ++ case ABBREV_ARRAY: ++ if (prev_type == ABBREV_ARRAY || i != count - 2) ++ { ++ WARN("Unexpected array abbreviation.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ abbrev->is_array = true; ++ --i; ++ --count; ++ break; ++ ++ case ABBREV_CHAR: ++ abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; ++ break; ++ ++ case ABBREV_BLOB: ++ if (prev_type == ABBREV_ARRAY || i != count - 1) ++ { ++ WARN("Unexpected blob abbreviation.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; ++ break; ++ } ++ ++ prev_type = type; ++ } ++ ++ abbrev->count = count; ++ ++ return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) ++{ ++ struct dxil_block *block = sm6->current_block; ++ unsigned int count = sm6_parser_read_vbr(sm6, 5); ++ struct dxil_global_abbrev *global_abbrev; ++ enum vkd3d_result ret; ++ ++ assert(block->id == BLOCKINFO_BLOCK); ++ ++ if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) ++ || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) ++ { ++ ERR("Failed to allocate global abbreviation.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) ++ { ++ vkd3d_free(global_abbrev); ++ return ret; ++ } ++ ++ if (!block->has_bid) ++ { ++ WARN("Missing blockinfo block id.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (block->blockinfo_bid == MODULE_BLOCK) ++ { ++ FIXME("Unhandled global abbreviation for module block.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ global_abbrev->block_id = block->blockinfo_bid; ++ ++ sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) ++{ ++ struct dxil_block *block = sm6->current_block; ++ struct dxil_abbrev *abbrev; ++ enum vkd3d_result ret; ++ unsigned int count; ++ ++ if (block->id == BLOCKINFO_BLOCK) ++ return sm6_parser_add_global_abbrev(sm6); ++ ++ count = sm6_parser_read_vbr(sm6, 5); ++ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) ++ || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) ++ { ++ ERR("Failed to allocate block abbreviation.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) ++ { ++ vkd3d_free(abbrev); ++ return ret; ++ } ++ ++ block->abbrevs[block->abbrev_count++] = abbrev; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) ++{ ++ enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; ++ struct dxil_block *block = sm6->current_block; ++ struct dxil_record *temp, *record; ++ unsigned int i, count, array_len; ++ struct dxil_abbrev *abbrev; ++ uint64_t code; ++ ++ if (abbrev_id >= block->abbrev_count) ++ { ++ WARN("Invalid abbreviation id %u.\n", abbrev_id); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ abbrev = block->abbrevs[abbrev_id]; ++ if (!(count = abbrev->count)) ++ return VKD3D_OK; ++ if (count == 1 && abbrev->is_array) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ /* First operand is the record code. The array is included in the count, but will be done separately. */ ++ count -= abbrev->is_array + 1; ++ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) ++ { ++ ERR("Failed to allocate record with %u operands.\n", count); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) ++ goto fail; ++ if (code > UINT_MAX) ++ FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); ++ record->code = code; ++ ++ for (i = 0; i < count; ++i) ++ if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) ++ goto fail; ++ record->operand_count = count; ++ ++ /* An array can occur only as the last operand. */ ++ if (abbrev->is_array) ++ { ++ array_len = sm6_parser_read_vbr(sm6, 6); ++ if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) ++ { ++ ERR("Failed to allocate record with %u operands.\n", count + array_len); ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto fail; ++ } ++ record = temp; ++ ++ for (i = 0; i < array_len; ++i) ++ { ++ if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, ++ &record->operands[count + i])) ++ { ++ goto fail; ++ } ++ } ++ record->operand_count += array_len; ++ } ++ ++ if ((ret = dxil_block_add_record(block, record)) < 0) ++ goto fail; ++ ++ return VKD3D_OK; ++ ++fail: ++ vkd3d_free(record); ++ return ret; ++} ++ ++static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, ++ struct sm6_parser *sm6); ++ ++static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) ++{ ++ unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; ++ struct dxil_block *block; ++ enum vkd3d_result ret; ++ ++ sm6->current_block = parent; ++ ++ do ++ { ++ unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); ++ ++ switch (abbrev_id) ++ { ++ case END_BLOCK: ++ sm6_parser_align_32(sm6); ++ return VKD3D_OK; ++ ++ case ENTER_SUBBLOCK: ++ if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) ++ { ++ WARN("Invalid subblock parent id %u.\n", parent->id); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, ++ max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) ++ || !(block = vkd3d_calloc(1, sizeof(*block)))) ++ { ++ ERR("Failed to allocate block.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = dxil_block_init(block, parent, sm6)) < 0) ++ { ++ vkd3d_free(block); ++ return ret; ++ } ++ ++ parent->child_blocks[parent->child_block_count++] = block; ++ sm6->current_block = parent; ++ break; ++ ++ case DEFINE_ABBREV: ++ if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) ++ return ret; ++ break; ++ ++ case UNABBREV_RECORD: ++ if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) ++ { ++ WARN("Failed to read unabbreviated record.\n"); ++ return ret; ++ } ++ break; ++ ++ default: ++ if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) ++ { ++ WARN("Failed to read abbreviated record.\n"); ++ return ret; ++ } ++ break; ++ } ++ } while (!sm6->p.failed); ++ ++ return VKD3D_ERROR_INVALID_SHADER; ++} ++ ++static size_t sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, ++ unsigned int block_id) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < sm6->abbrev_count; ++i) ++ count += sm6->abbrevs[i]->block_id == block_id; ++ ++ return count; ++} ++ ++static void dxil_block_destroy(struct dxil_block *block) ++{ ++ size_t i; ++ ++ for (i = 0; i < block->record_count; ++i) ++ vkd3d_free(block->records[i]); ++ vkd3d_free(block->records); ++ ++ for (i = 0; i < block->child_block_count; ++i) ++ { ++ dxil_block_destroy(block->child_blocks[i]); ++ vkd3d_free(block->child_blocks[i]); ++ } ++ vkd3d_free(block->child_blocks); ++ ++ block->records = NULL; ++ block->record_count = 0; ++ block->child_blocks = NULL; ++ block->child_block_count = 0; ++} ++ ++static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, ++ struct sm6_parser *sm6) ++{ ++ size_t i, abbrev_count = 0; ++ enum vkd3d_result ret; ++ ++ block->parent = parent; ++ block->level = parent ? parent->level + 1 : 0; ++ block->id = sm6_parser_read_vbr(sm6, 8); ++ block->abbrev_len = sm6_parser_read_vbr(sm6, 4); ++ sm6_parser_align_32(sm6); ++ block->length = sm6_parser_read_uint32(sm6); ++ block->start = sm6->ptr - sm6->start; ++ ++ if (sm6->p.failed) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) ++ { ++ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, ++ block->abbrev_count, sizeof(*block->abbrevs))) ++ { ++ ERR("Failed to allocate block abbreviations.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ for (i = 0; i < sm6->abbrev_count; ++i) ++ if (sm6->abbrevs[i]->block_id == block->id) ++ block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; ++ ++ assert(abbrev_count == block->abbrev_count); ++ } ++ ++ if ((ret = dxil_block_read(block, sm6)) < 0) ++ dxil_block_destroy(block); ++ ++ for (i = abbrev_count; i < block->abbrev_count; ++i) ++ vkd3d_free(block->abbrevs[i]); ++ vkd3d_free(block->abbrevs); ++ block->abbrevs = NULL; ++ block->abbrev_count = 0; ++ ++ return ret; ++} ++ ++static size_t dxil_block_compute_function_count(const struct dxil_block *root) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < root->child_block_count; ++i) ++ count += root->child_blocks[i]->id == FUNCTION_BLOCK; ++ ++ return count; ++} ++ ++static size_t dxil_block_compute_module_decl_count(const struct dxil_block *block) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < block->record_count; ++i) ++ count += block->records[i]->code == MODULE_CODE_FUNCTION; ++ return count; ++} ++ ++static size_t dxil_block_compute_constants_count(const struct dxil_block *block) ++{ ++ size_t i, count; ++ ++ for (i = 0, count = 0; i < block->record_count; ++i) ++ count += block->records[i]->code != CST_CODE_SETTYPE; ++ return count; ++} ++ ++static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, size_t count) ++{ ++ size_t i; ++ ++ for (i = 0; i < count; ++i) ++ vkd3d_free(abbrevs[i]); ++ vkd3d_free(abbrevs); ++} ++ ++static const struct dxil_block *sm6_parser_get_level_one_block(const struct sm6_parser *sm6, ++ enum bitcode_block_id id, bool *is_unique) ++{ ++ const struct dxil_block *block, *found = NULL; ++ size_t i; ++ ++ for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) ++ { ++ block = sm6->root_block.child_blocks[i]; ++ if (block->id != id) ++ continue; ++ ++ if (!found) ++ found = block; ++ else ++ *is_unique = false; ++ } ++ ++ return found; ++} ++ ++static char *dxil_record_to_string(const struct dxil_record *record, unsigned int offset) ++{ ++ unsigned int i; ++ char *str; ++ ++ assert(offset <= record->operand_count); ++ if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) ++ return NULL; ++ ++ for (i = offset; i < record->operand_count; ++i) ++ str[i - offset] = record->operands[i]; ++ ++ return str; ++} ++ ++static bool dxil_record_validate_operand_min_count(const struct dxil_record *record, unsigned int min_count, ++ struct sm6_parser *sm6) ++{ ++ if (record->operand_count >= min_count) ++ return true; ++ ++ WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Invalid operand count %u for record code %u.", record->operand_count, record->code); ++ return false; ++} ++ ++static void dxil_record_validate_operand_max_count(const struct dxil_record *record, unsigned int max_count, ++ struct sm6_parser *sm6) ++{ ++ if (record->operand_count <= max_count) ++ return; ++ ++ WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); ++} ++ ++static bool dxil_record_validate_operand_count(const struct dxil_record *record, unsigned int min_count, ++ unsigned int max_count, struct sm6_parser *sm6) ++{ ++ dxil_record_validate_operand_max_count(record, max_count, sm6); ++ return dxil_record_validate_operand_min_count(record, min_count, sm6); ++} ++ ++static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) ++{ ++ const struct dxil_record *record; ++ size_t i, type_count, type_index; ++ const struct dxil_block *block; ++ char *struct_name = NULL; ++ unsigned int j, count; ++ struct sm6_type *type; ++ uint64_t type_id; ++ bool is_unique; ++ ++ sm6->p.location.line = 0; ++ sm6->p.location.column = 0; ++ ++ if (!(block = sm6_parser_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) ++ { ++ WARN("No type definitions found.\n"); ++ return VKD3D_OK; ++ } ++ if (!is_unique) ++ WARN("Ignoring invalid extra type table(s).\n"); ++ ++ sm6->p.location.line = block->id; ++ ++ type_count = 0; ++ for (i = 0; i < block->record_count; ++i) ++ type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; ++ ++ /* The type array must not be relocated. */ ++ if (!(sm6->types = vkd3d_calloc(type_count, sizeof(*sm6->types)))) ++ { ++ ERR("Failed to allocate type array.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ for (i = 0; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ ++ type = &sm6->types[sm6->type_count]; ++ type_index = sm6->type_count; ++ ++ switch (record->code) ++ { ++ case TYPE_CODE_ARRAY: ++ case TYPE_CODE_VECTOR: ++ if (!dxil_record_validate_operand_count(record, 2, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ type->class = record->code == TYPE_CODE_ARRAY ? TYPE_CLASS_ARRAY : TYPE_CLASS_VECTOR; ++ ++ if (!(type->u.array.count = record->operands[0])) ++ { ++ TRACE("Setting unbounded for type %zu.\n", type_index); ++ type->u.array.count = UINT_MAX; ++ } ++ ++ if ((type_id = record->operands[1]) >= type_count) ++ { ++ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.array.elem_type = &sm6->types[type_id]; ++ break; ++ ++ case TYPE_CODE_DOUBLE: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_FLOAT; ++ type->u.width = 64; ++ break; ++ ++ case TYPE_CODE_FLOAT: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_FLOAT; ++ type->u.width = 32; ++ break; ++ ++ case TYPE_CODE_FUNCTION: ++ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ if (record->operands[0]) ++ FIXME("Unhandled vararg function type %zu.\n", type_index); ++ ++ type->class = TYPE_CLASS_FUNCTION; ++ ++ if ((type_id = record->operands[1]) >= type_count) ++ { ++ WARN("Invalid return type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ count = record->operand_count - 2; ++ if (vkd3d_object_range_overflow(sizeof(type->u.function), count, sizeof(type->u.function->param_types[0])) ++ || !(type->u.function = vkd3d_malloc(offsetof(struct sm6_function_info, param_types[count])))) ++ { ++ ERR("Failed to allocate function parameter types.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ type->u.function->ret_type = &sm6->types[type_id]; ++ type->u.function->param_count = count; ++ for (j = 0; j < count; ++j) ++ { ++ if ((type_id = record->operands[j + 2]) >= type_count) ++ { ++ WARN("Invalid parameter type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ vkd3d_free(type->u.function); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.function->param_types[j] = &sm6->types[type_id]; ++ } ++ break; ++ ++ case TYPE_CODE_HALF: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_FLOAT; ++ type->u.width = 16; ++ break; ++ ++ case TYPE_CODE_INTEGER: ++ { ++ uint64_t width; ++ ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ type->class = TYPE_CLASS_INTEGER; ++ ++ switch ((width = record->operands[0])) ++ { ++ case 1: ++ case 8: ++ case 16: ++ case 32: ++ case 64: ++ break; ++ default: ++ WARN("Invalid integer width %"PRIu64" for type %zu.\n", width, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.width = width; ++ break; ++ } ++ ++ case TYPE_CODE_LABEL: ++ type->class = TYPE_CLASS_LABEL; ++ break; ++ ++ case TYPE_CODE_METADATA: ++ type->class = TYPE_CLASS_METADATA; ++ break; ++ ++ case TYPE_CODE_NUMENTRY: ++ continue; ++ ++ case TYPE_CODE_POINTER: ++ if (!dxil_record_validate_operand_count(record, 1, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ type->class = TYPE_CLASS_POINTER; ++ ++ if ((type_id = record->operands[0]) >= type_count) ++ { ++ WARN("Invalid pointee type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.pointer.type = &sm6->types[type_id]; ++ type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : ADDRESS_SPACE_DEFAULT; ++ break; ++ ++ case TYPE_CODE_STRUCT_ANON: ++ case TYPE_CODE_STRUCT_NAMED: ++ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ if (record->code == TYPE_CODE_STRUCT_NAMED && !struct_name) ++ { ++ WARN("Missing struct name before struct type %zu.\n", type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ type->class = TYPE_CLASS_STRUCT; ++ ++ count = record->operand_count - 1; ++ if (vkd3d_object_range_overflow(sizeof(type->u.struc), count, sizeof(type->u.struc->elem_types[0])) ++ || !(type->u.struc = vkd3d_malloc(offsetof(struct sm6_struct_info, elem_types[count])))) ++ { ++ ERR("Failed to allocate struct element types.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (record->operands[0]) ++ FIXME("Ignoring struct packed attribute.\n"); ++ ++ type->u.struc->elem_count = count; ++ for (j = 0; j < count; ++j) ++ { ++ if ((type_id = record->operands[j + 1]) >= type_count) ++ { ++ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); ++ vkd3d_free(type->u.struc); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ type->u.struc->elem_types[j] = &sm6->types[type_id]; ++ } ++ ++ if (record->code == TYPE_CODE_STRUCT_ANON) ++ { ++ type->u.struc->name = NULL; ++ break; ++ } ++ ++ type->u.struc->name = struct_name; ++ struct_name = NULL; ++ break; ++ ++ case TYPE_CODE_STRUCT_NAME: ++ if (!(struct_name = dxil_record_to_string(record, 0))) ++ { ++ ERR("Failed to allocate struct name.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ if (!struct_name[0]) ++ WARN("Struct name is empty for type %zu.\n", type_index); ++ continue; ++ ++ case TYPE_CODE_VOID: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ type->class = TYPE_CLASS_VOID; ++ break; ++ ++ default: ++ FIXME("Unhandled type %u at index %zu.\n", record->code, type_index); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++sm6->type_count; ++ } ++ ++ assert(sm6->type_count == type_count); ++ ++ if (struct_name) ++ { ++ WARN("Unused struct name %s.\n", struct_name); ++ vkd3d_free(struct_name); ++ } ++ ++ return VKD3D_OK; ++} ++ ++static inline bool sm6_type_is_void(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_VOID; ++} ++ ++static inline bool sm6_type_is_integer(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER; ++} ++ ++static inline bool sm6_type_is_i8(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER && type->u.width == 8; ++} ++ ++static inline bool sm6_type_is_i32(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER && type->u.width == 32; ++} ++ ++static inline bool sm6_type_is_floating_point(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_FLOAT; ++} ++ ++static inline bool sm6_type_is_numeric(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_INTEGER || type->class == TYPE_CLASS_FLOAT; ++} ++ ++static inline bool sm6_type_is_pointer(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_POINTER; ++} ++ ++static bool sm6_type_is_numeric_aggregate(const struct sm6_type *type) ++{ ++ unsigned int i; ++ ++ switch (type->class) ++ { ++ case TYPE_CLASS_ARRAY: ++ case TYPE_CLASS_VECTOR: ++ return sm6_type_is_numeric(type->u.array.elem_type); ++ ++ case TYPE_CLASS_STRUCT: ++ /* Do not handle nested structs. Support can be added if they show up. */ ++ for (i = 0; i < type->u.struc->elem_count; ++i) ++ if (!sm6_type_is_numeric(type->u.struc->elem_types[i])) ++ return false; ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static inline bool sm6_type_is_struct(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_STRUCT; ++} ++ ++static inline bool sm6_type_is_function(const struct sm6_type *type) ++{ ++ return type->class == TYPE_CLASS_FUNCTION; ++} ++ ++static inline bool sm6_type_is_function_pointer(const struct sm6_type *type) ++{ ++ return sm6_type_is_pointer(type) && sm6_type_is_function(type->u.pointer.type); ++} ++ ++static inline bool sm6_type_is_handle(const struct sm6_type *type) ++{ ++ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Handle"); ++} ++ ++static inline const struct sm6_type *sm6_type_get_element_type(const struct sm6_type *type) ++{ ++ return (type->class == TYPE_CLASS_ARRAY || type->class == TYPE_CLASS_VECTOR) ? type->u.array.elem_type : type; ++} ++ ++static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type *type, ++ enum bitcode_address_space addr_space, struct sm6_parser *sm6) ++{ ++ size_t i, start = type - sm6->types; ++ const struct sm6_type *pointer_type; ++ ++ /* DXC seems usually to place the pointer type immediately after its pointee. */ ++ for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) ++ { ++ pointer_type = &sm6->types[i]; ++ if (sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type ++ && pointer_type->u.pointer.addr_space == addr_space) ++ return pointer_type; ++ } ++ ++ return NULL; ++} ++ ++/* Never returns null for elem_idx 0. */ ++static const struct sm6_type *sm6_type_get_scalar_type(const struct sm6_type *type, unsigned int elem_idx) ++{ ++ switch (type->class) ++ { ++ case TYPE_CLASS_ARRAY: ++ case TYPE_CLASS_VECTOR: ++ if (elem_idx >= type->u.array.count) ++ return NULL; ++ return sm6_type_get_scalar_type(type->u.array.elem_type, 0); ++ ++ case TYPE_CLASS_POINTER: ++ return sm6_type_get_scalar_type(type->u.pointer.type, 0); ++ ++ case TYPE_CLASS_STRUCT: ++ if (elem_idx >= type->u.struc->elem_count) ++ return NULL; ++ return sm6_type_get_scalar_type(type->u.struc->elem_types[elem_idx], 0); ++ ++ default: ++ return type; ++ } ++} ++ ++static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) ++{ ++ if (type_id >= sm6->type_count) ++ { ++ WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, ++ "DXIL type id %"PRIu64" is invalid.", type_id); ++ return NULL; ++ } ++ return &sm6->types[type_id]; ++} ++ ++static int global_symbol_compare(const void *a, const void *b) ++{ ++ return vkd3d_u32_compare(((const struct sm6_symbol *)a)->id, ((const struct sm6_symbol *)b)->id); ++} ++ ++static enum vkd3d_result sm6_parser_symtab_init(struct sm6_parser *sm6) ++{ ++ const struct dxil_record *record; ++ const struct dxil_block *block; ++ struct sm6_symbol *symbol; ++ size_t i, count; ++ bool is_unique; ++ ++ sm6->p.location.line = 0; ++ sm6->p.location.column = 0; ++ ++ if (!(block = sm6_parser_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) ++ { ++ /* There should always be at least one symbol: the name of the entry point function. */ ++ WARN("No value symtab block found.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!is_unique) ++ FIXME("Ignoring extra value symtab block(s).\n"); ++ ++ sm6->p.location.line = block->id; ++ ++ for (i = 0, count = 0; i < block->record_count; ++i) ++ count += block->records[i]->code == VST_CODE_ENTRY; ++ ++ if (!(sm6->global_symbols = vkd3d_calloc(count, sizeof(*sm6->global_symbols)))) ++ { ++ ERR("Failed to allocate global symbols.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ for (i = 0; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ ++ if (record->code != VST_CODE_ENTRY) ++ { ++ FIXME("Unhandled symtab code %u.\n", record->code); ++ continue; ++ } ++ if (!dxil_record_validate_operand_min_count(record, 1, sm6)) ++ continue; ++ ++ symbol = &sm6->global_symbols[sm6->global_symbol_count]; ++ symbol->id = record->operands[0]; ++ if (!(symbol->name = dxil_record_to_string(record, 1))) ++ { ++ ERR("Failed to allocate symbol name.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++sm6->global_symbol_count; ++ } ++ ++ sm6->p.location.column = block->record_count; ++ ++ qsort(sm6->global_symbols, sm6->global_symbol_count, sizeof(*sm6->global_symbols), global_symbol_compare); ++ for (i = 1; i < sm6->global_symbol_count; ++i) ++ { ++ if (sm6->global_symbols[i].id == sm6->global_symbols[i - 1].id) ++ { ++ WARN("Invalid duplicate symbol id %u.\n", sm6->global_symbols[i].id); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ ++static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm6, size_t id) ++{ ++ size_t i, start; ++ ++ /* id == array index is normally true */ ++ i = start = id % sm6->global_symbol_count; ++ do ++ { ++ if (sm6->global_symbols[i].id == id) ++ return sm6->global_symbols[i].name; ++ i = (i + 1) % sm6->global_symbol_count; ++ } while (i != start); ++ ++ return NULL; ++} ++ ++static unsigned int register_get_uint_value(const struct vkd3d_shader_register *reg) ++{ ++ if (!register_is_constant(reg) || !data_type_is_integer(reg->data_type)) ++ return UINT_MAX; ++ ++ if (reg->immconst_type == VKD3D_IMMCONST_VEC4) ++ WARN("Returning vec4.x.\n"); ++ ++ if (reg->type == VKD3DSPR_IMMCONST64) ++ { ++ if (reg->u.immconst_uint64[0] > UINT_MAX) ++ FIXME("Truncating 64-bit value.\n"); ++ return reg->u.immconst_uint64[0]; ++ } ++ ++ return reg->u.immconst_uint[0]; ++} ++ ++static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) ++{ ++ return value->value_type == VALUE_TYPE_FUNCTION; ++} ++ ++static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) ++{ ++ assert(sm6_value_is_function_dcl(fn)); ++ return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); ++} ++ ++static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) ++{ ++ assert(sm6->value_count < sm6->value_capacity); ++ return &sm6->values[sm6->value_count]; ++} ++ ++static inline bool sm6_value_is_register(const struct sm6_value *value) ++{ ++ return value->value_type == VALUE_TYPE_REG; ++} ++ ++static inline bool sm6_value_is_constant(const struct sm6_value *value) ++{ ++ return sm6_value_is_register(value) && register_is_constant(&value->u.reg); ++} ++ ++static inline bool sm6_value_is_undef(const struct sm6_value *value) ++{ ++ return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; ++} ++ ++static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) ++{ ++ if (!sm6_value_is_constant(value)) ++ return UINT_MAX; ++ return register_get_uint_value(&value->u.reg); ++} ++ ++static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_shader_instruction *ins, ++ unsigned int count, struct sm6_parser *sm6) ++{ ++ struct vkd3d_shader_src_param *params = shader_parser_get_src_params(&sm6->p, count); ++ if (!params) ++ { ++ ERR("Failed to allocate src params.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating instruction src paramaters."); ++ return NULL; ++ } ++ ins->src = params; ++ ins->src_count = count; ++ return params; ++} ++ ++static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_shader_instruction *ins, ++ unsigned int count, struct sm6_parser *sm6) ++{ ++ struct vkd3d_shader_dst_param *params = shader_parser_get_dst_params(&sm6->p, count); ++ if (!params) ++ { ++ ERR("Failed to allocate dst params.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating instruction dst paramaters."); ++ return NULL; ++ } ++ ins->dst = params; ++ ins->dst_count = count; ++ return params; ++} ++ ++static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) ++{ ++ if (type->class == TYPE_CLASS_INTEGER) ++ { ++ switch (type->u.width) ++ { ++ case 8: ++ return VKD3D_DATA_UINT8; ++ case 32: ++ return VKD3D_DATA_UINT; ++ default: ++ FIXME("Unhandled width %u.\n", type->u.width); ++ return VKD3D_DATA_UINT; ++ } ++ } ++ else if (type->class == TYPE_CLASS_FLOAT) ++ { ++ switch (type->u.width) ++ { ++ case 32: ++ return VKD3D_DATA_FLOAT; ++ case 64: ++ return VKD3D_DATA_DOUBLE; ++ default: ++ FIXME("Unhandled width %u.\n", type->u.width); ++ return VKD3D_DATA_FLOAT; ++ } ++ } ++ ++ FIXME("Unhandled type %u.\n", type->class); ++ return VKD3D_DATA_UINT; ++} ++ ++static inline void dst_param_init_scalar(struct vkd3d_shader_dst_param *param, unsigned int component_idx) ++{ ++ param->write_mask = 1u << component_idx; ++ param->modifiers = 0; ++ param->shift = 0; ++} ++ ++static inline void src_param_init(struct vkd3d_shader_src_param *param) ++{ ++ param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ param->modifiers = VKD3DSPSM_NONE; ++} ++ ++static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) ++{ ++ src_param_init(param); ++ param->reg = src->u.reg; ++} ++ ++static void register_address_init(struct vkd3d_shader_register *reg, const struct sm6_value *address, ++ unsigned int idx, struct sm6_parser *sm6) ++{ ++ assert(idx < ARRAY_SIZE(reg->idx)); ++ if (sm6_value_is_constant(address)) ++ { ++ reg->idx[idx].offset = sm6_value_get_constant_uint(address); ++ } ++ else if (sm6_value_is_undef(address)) ++ { ++ reg->idx[idx].offset = 0; ++ } ++ else ++ { ++ struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&sm6->p, 1); ++ if (rel_addr) ++ src_param_init_from_value(rel_addr, address); ++ reg->idx[idx].offset = 0; ++ reg->idx[idx].rel_addr = rel_addr; ++ } ++} ++ ++/* Recurse through the block tree while maintaining a current value count. The current ++ * count is the sum of the global count plus all declarations within the current function. ++ * Store into value_capacity the highest count seen. */ ++static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, ++ const struct dxil_block *block, size_t value_count) ++{ ++ size_t i, old_value_count = value_count; ++ ++ if (block->id == MODULE_BLOCK) ++ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_module_decl_count(block)); ++ ++ for (i = 0; i < block->child_block_count; ++i) ++ value_count = sm6_parser_compute_max_value_count(sm6, block->child_blocks[i], value_count); ++ ++ switch (block->id) ++ { ++ case CONSTANTS_BLOCK: ++ /* Function local constants are contained in a child block of the function block. */ ++ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_constants_count(block)); ++ break; ++ case FUNCTION_BLOCK: ++ /* A function must start with a block count, which emits no value. This formula is likely to ++ * overestimate the value count somewhat, but this should be no problem. */ ++ value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); ++ sm6->value_capacity = max(sm6->value_capacity, value_count); ++ sm6->functions[sm6->function_count].value_count = value_count; ++ /* The value count returns to its previous value after handling a function. */ ++ if (value_count < SIZE_MAX) ++ value_count = old_value_count; ++ break; ++ default: ++ break; ++ } ++ ++ return value_count; ++} ++ ++static size_t sm6_parser_get_value_index(struct sm6_parser *sm6, uint64_t idx) ++{ ++ size_t i; ++ ++ /* The value relative index is 32 bits. */ ++ if (idx > UINT32_MAX) ++ WARN("Ignoring upper 32 bits of relative index.\n"); ++ i = (uint32_t)sm6->value_count - (uint32_t)idx; ++ ++ /* This may underflow to produce a forward reference, but it must not exceeed the final value count. */ ++ if (i >= sm6->cur_max_value) ++ { ++ WARN("Invalid value index %"PRIx64" at %zu.\n", idx, sm6->value_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value relative index %u.", (unsigned int)idx); ++ return SIZE_MAX; ++ } ++ if (i == sm6->value_count) ++ { ++ WARN("Invalid value self-reference at %zu.\n", sm6->value_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value self-reference."); ++ return SIZE_MAX; ++ } ++ ++ return i; ++} ++ ++static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const struct dxil_record *record, ++ const struct sm6_type *fwd_type, unsigned int *rec_idx) ++{ ++ unsigned int idx; ++ uint64_t val_ref; ++ size_t operand; ++ ++ idx = *rec_idx; ++ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) ++ return SIZE_MAX; ++ val_ref = record->operands[idx++]; ++ ++ operand = sm6_parser_get_value_index(sm6, val_ref); ++ if (operand == SIZE_MAX) ++ return SIZE_MAX; ++ ++ if (operand >= sm6->value_count) ++ { ++ if (!fwd_type) ++ { ++ /* Forward references are followed by a type id unless an earlier operand set the type, ++ * or it is contained in a function declaration. */ ++ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) ++ return SIZE_MAX; ++ if (!(fwd_type = sm6_parser_get_type(sm6, record->operands[idx++]))) ++ return SIZE_MAX; ++ } ++ FIXME("Forward value references are not supported yet.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Unsupported value forward reference."); ++ return SIZE_MAX; ++ } ++ *rec_idx = idx; ++ ++ return operand; ++} ++ ++static const struct sm6_value *sm6_parser_get_value_by_ref(struct sm6_parser *sm6, ++ const struct dxil_record *record, const struct sm6_type *type, unsigned int *rec_idx) ++{ ++ size_t operand = sm6_parser_get_value_idx_by_ref(sm6, record, type, rec_idx); ++ return operand == SIZE_MAX ? NULL : &sm6->values[operand]; ++} ++ ++static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) ++{ ++ const unsigned int max_count = 15; ++ const struct sm6_type *ret_type; ++ struct sm6_value *fn; ++ unsigned int i, j; ++ ++ if (!dxil_record_validate_operand_count(record, 8, max_count, sm6)) ++ return false; ++ ++ fn = sm6_parser_get_current_value(sm6); ++ fn->value_type = VALUE_TYPE_FUNCTION; ++ if (!(fn->u.function.name = sm6_parser_get_global_symbol_name(sm6, sm6->value_count))) ++ { ++ WARN("Missing symbol name for function %zu.\n", sm6->value_count); ++ fn->u.function.name = ""; ++ } ++ ++ if (!(fn->type = sm6_parser_get_type(sm6, record->operands[0]))) ++ return false; ++ if (!sm6_type_is_function(fn->type)) ++ { ++ WARN("Type is not a function.\n"); ++ return false; ++ } ++ ret_type = fn->type->u.function->ret_type; ++ ++ if (!(fn->type = sm6_type_get_pointer_to_type(fn->type, ADDRESS_SPACE_DEFAULT, sm6))) ++ { ++ WARN("Failed to get pointer type for type %u.\n", fn->type->class); ++ return false; ++ } ++ ++ if (record->operands[1]) ++ WARN("Ignoring calling convention %#"PRIx64".\n", record->operands[1]); ++ ++ fn->u.function.is_prototype = !!record->operands[2]; ++ ++ if (record->operands[3]) ++ WARN("Ignoring linkage %#"PRIx64".\n", record->operands[3]); ++ ++ if (record->operands[4] > UINT_MAX) ++ WARN("Invalid attributes id %#"PRIx64".\n", record->operands[4]); ++ /* 1-based index. */ ++ if ((fn->u.function.attribs_id = record->operands[4])) ++ TRACE("Ignoring function attributes.\n"); ++ ++ /* These always seem to be zero. */ ++ for (i = 5, j = 0; i < min(record->operand_count, max_count); ++i) ++ j += !!record->operands[i]; ++ if (j) ++ WARN("Ignoring %u operands.\n", j); ++ ++ if (sm6_value_is_dx_intrinsic_dcl(fn) && !sm6_type_is_void(ret_type) && !sm6_type_is_numeric(ret_type) ++ && !sm6_type_is_numeric_aggregate(ret_type) && !sm6_type_is_handle(ret_type)) ++ { ++ WARN("Unexpected return type for dx intrinsic function '%s'.\n", fn->u.function.name); ++ } ++ ++ ++sm6->value_count; ++ ++ return true; ++} ++ ++static inline uint64_t decode_rotated_signed_value(uint64_t value) ++{ ++ if (value != 1) ++ { ++ bool neg = value & 1; ++ value >>= 1; ++ return neg ? -value : value; ++ } ++ return value << 63; ++} ++ ++static inline float bitcast_uint64_to_float(uint64_t value) ++{ ++ union ++ { ++ uint32_t uint32_value; ++ float float_value; ++ } u; ++ ++ u.uint32_value = value; ++ return u.float_value; ++} ++ ++static inline double bitcast_uint64_to_double(uint64_t value) ++{ ++ union ++ { ++ uint64_t uint64_value; ++ double double_value; ++ } u; ++ ++ u.uint64_value = value; ++ return u.double_value; ++} ++ ++static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) ++{ ++ enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; ++ const struct sm6_type *type, *elem_type; ++ enum vkd3d_data_type reg_data_type; ++ const struct dxil_record *record; ++ struct sm6_value *dst; ++ size_t i, value_idx; ++ uint64_t value; ++ ++ for (i = 0, type = NULL; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ value_idx = sm6->value_count; ++ ++ if (record->code == CST_CODE_SETTYPE) ++ { ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if (!(type = sm6_parser_get_type(sm6, record->operands[0]))) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ elem_type = sm6_type_get_element_type(type); ++ if (sm6_type_is_numeric(elem_type)) ++ { ++ reg_data_type = vkd3d_data_type_from_sm6_type(elem_type); ++ reg_type = elem_type->u.width > 32 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST; ++ } ++ else ++ { ++ reg_data_type = VKD3D_DATA_UNUSED; ++ reg_type = VKD3DSPR_INVALID; ++ } ++ ++ if (i == block->record_count - 1) ++ WARN("Unused SETTYPE record.\n"); ++ ++ continue; ++ } ++ ++ if (!type) ++ { ++ WARN("Constant record %zu has no type.\n", value_idx); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ dst = sm6_parser_get_current_value(sm6); ++ dst->type = type; ++ dst->value_type = VALUE_TYPE_REG; ++ dst->u.reg.type = reg_type; ++ dst->u.reg.immconst_type = VKD3D_IMMCONST_SCALAR; ++ dst->u.reg.data_type = reg_data_type; ++ ++ switch (record->code) ++ { ++ case CST_CODE_NULL: ++ /* Register constant data is already zero-filled. */ ++ break; ++ ++ case CST_CODE_INTEGER: ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if (!sm6_type_is_integer(type)) ++ { ++ WARN("Invalid integer of non-integer type %u at constant idx %zu.\n", type->class, value_idx); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ value = decode_rotated_signed_value(record->operands[0]); ++ if (type->u.width <= 32) ++ dst->u.reg.u.immconst_uint[0] = value & ((1ull << type->u.width) - 1); ++ else ++ dst->u.reg.u.immconst_uint64[0] = value; ++ ++ break; ++ ++ case CST_CODE_FLOAT: ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if (!sm6_type_is_floating_point(type)) ++ { ++ WARN("Invalid float of non-fp type %u at constant idx %zu.\n", type->class, value_idx); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (type->u.width == 16) ++ FIXME("Half float type is not supported yet.\n"); ++ else if (type->u.width == 32) ++ dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); ++ else if (type->u.width == 64) ++ dst->u.reg.u.immconst_double[0] = bitcast_uint64_to_double(record->operands[0]); ++ else ++ vkd3d_unreachable(); ++ ++ break; ++ ++ case CST_CODE_DATA: ++ WARN("Unhandled constant array.\n"); ++ break; ++ ++ case CST_CODE_UNDEF: ++ dxil_record_validate_operand_max_count(record, 0, sm6); ++ dst->u.reg.type = VKD3DSPR_UNDEF; ++ /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ ++ dst->is_undefined = true; ++ break; ++ ++ default: ++ FIXME("Unhandled constant code %u.\n", record->code); ++ dst->u.reg.type = VKD3DSPR_UNDEF; ++ break; ++ } ++ ++ ++sm6->value_count; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) ++{ ++ if (!shader_instruction_array_reserve(&sm6->p.instructions, sm6->p.instructions.count + extra)) ++ { ++ ERR("Failed to allocate instruction.\n"); ++ return NULL; ++ } ++ return &sm6->p.instructions.elements[sm6->p.instructions.count]; ++} ++ ++/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ ++static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, ++ enum vkd3d_shader_opcode handler_idx) ++{ ++ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); ++ assert(ins); ++ shader_instruction_init(ins, handler_idx); ++ ++sm6->p.instructions.count; ++ return ins; ++} ++ ++static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) ++{ ++ const struct dxil_block *block = &sm6->root_block; ++ const struct dxil_record *record; ++ uint64_t version; ++ size_t i; ++ ++ sm6->p.location.line = block->id; ++ sm6->p.location.column = 0; ++ ++ for (i = 0; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ record = block->records[i]; ++ switch (record->code) ++ { ++ case MODULE_CODE_FUNCTION: ++ if (!sm6_parser_declare_function(sm6, record)) ++ { ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL, ++ "A DXIL function declaration is invalid."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ break; ++ ++ case MODULE_CODE_GLOBALVAR: ++ FIXME("Global variables are not implemented yet.\n"); ++ break; ++ ++ case MODULE_CODE_VERSION: ++ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ if ((version = record->operands[0]) != 1) ++ { ++ FIXME("Unsupported format version %#"PRIx64".\n", version); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, ++ "Bitcode format version %#"PRIx64" is unsupported.", version); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ break; ++ ++ default: ++ break; ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ ++static void dst_param_io_init(struct vkd3d_shader_dst_param *param, ++ const struct signature_element *e, enum vkd3d_shader_register_type reg_type) ++{ ++ enum vkd3d_shader_component_type component_type; ++ ++ param->write_mask = e->mask; ++ param->modifiers = 0; ++ param->shift = 0; ++ /* DXIL types do not have signedness. Load signed elements as unsigned. */ ++ component_type = e->component_type == VKD3D_SHADER_COMPONENT_INT ? VKD3D_SHADER_COMPONENT_UINT : e->component_type; ++ shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(component_type), 0); ++} ++ ++static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, ++ enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) ++{ ++ struct vkd3d_shader_dst_param *param; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < s->element_count; ++i) ++ { ++ e = &s->elements[i]; ++ ++ param = ¶ms[i]; ++ dst_param_io_init(param, e, reg_type); ++ param->reg.idx[0].offset = i; ++ param->reg.idx_count = 1; ++ } ++} ++ ++static void sm6_parser_emit_signature(struct sm6_parser *sm6, const struct shader_signature *s, ++ enum vkd3d_shader_opcode handler_idx, enum vkd3d_shader_opcode siv_handler_idx, ++ struct vkd3d_shader_dst_param *params) ++{ ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_dst_param *param; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < s->element_count; ++i) ++ { ++ e = &s->elements[i]; ++ ++ /* Do not check e->used_mask because in some cases it is zero for used elements. ++ * TODO: scan ahead for used I/O elements. */ ++ ++ if (e->sysval_semantic != VKD3D_SHADER_SV_NONE && e->sysval_semantic != VKD3D_SHADER_SV_TARGET) ++ { ++ ins = sm6_parser_add_instruction(sm6, siv_handler_idx); ++ param = &ins->declaration.register_semantic.reg; ++ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); ++ } ++ else ++ { ++ ins = sm6_parser_add_instruction(sm6, handler_idx); ++ param = &ins->declaration.dst; ++ } ++ ++ *param = params[i]; ++ } ++} ++ ++static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) ++{ ++ sm6_parser_init_signature(sm6, output_signature, ++ (sm6->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3DSPR_COLOROUT : VKD3DSPR_OUTPUT, ++ sm6->output_params); ++} ++ ++static void sm6_parser_emit_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) ++{ ++ sm6_parser_emit_signature(sm6, output_signature, VKD3DSIH_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT_SIV, sm6->output_params); ++} ++ ++static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) ++{ ++ size_t i, count = sm6->function_count; ++ ++ for (i = 0; i < sm6->value_count; ++i) ++ { ++ if (sm6_type_is_function_pointer(sm6->values[i].type) && !sm6->values[i].u.function.is_prototype && !count--) ++ break; ++ } ++ if (i == sm6->value_count) ++ return NULL; ++ ++ ++sm6->function_count; ++ return &sm6->values[i]; ++} ++ ++static struct sm6_block *sm6_block_create() ++{ ++ struct sm6_block *block = vkd3d_calloc(1, sizeof(*block)); ++ return block; ++} ++ ++static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, struct sm6_block *code_block, ++ enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct vkd3d_shader_instruction *ins) ++{ ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_dst_param *dst_param; ++ const struct shader_signature *signature; ++ unsigned int row_index, column_index; ++ const struct signature_element *e; ++ const struct sm6_value *value; ++ ++ row_index = sm6_value_get_constant_uint(operands[0]); ++ column_index = sm6_value_get_constant_uint(operands[2]); ++ ++ signature = &sm6->p.shader_desc.output_signature; ++ if (row_index >= signature->element_count) ++ { ++ WARN("Invalid row index %u.\n", row_index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid output row index %u.", row_index); ++ return; ++ } ++ e = &signature->elements[row_index]; ++ ++ if (column_index >= VKD3D_VEC4_SIZE) ++ { ++ WARN("Invalid column index %u.\n", column_index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid output column index %u.", column_index); ++ return; ++ } ++ ++ value = operands[3]; ++ if (!sm6_value_is_register(value)) ++ { ++ WARN("Source value is not a register.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Expected store operation source to be a register."); ++ return; ++ } ++ ++ shader_instruction_init(ins, VKD3DSIH_MOV); ++ ++ if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) ++ return; ++ dst_param_init_scalar(dst_param, column_index); ++ dst_param->reg = sm6->output_params[row_index].reg; ++ if (e->register_count > 1) ++ register_address_init(&dst_param->reg, operands[1], 0, sm6); ++ ++ if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ src_param_init_from_value(src_param, value); ++} ++ ++struct sm6_dx_opcode_info ++{ ++ const char ret_type; ++ const char *operand_info; ++ void (*handler)(struct sm6_parser *, struct sm6_block *, enum dx_intrinsic_opcode, ++ const struct sm6_value **, struct vkd3d_shader_instruction *); ++}; ++ ++/* ++ 8 -> int8 ++ i -> int32 ++ v -> void ++ o -> overloaded ++ */ ++static const struct sm6_dx_opcode_info sm6_dx_op_table[] = ++{ ++ [DX_STORE_OUTPUT ] = {'v', "ii8o", sm6_parser_emit_dx_store_output}, ++}; ++ ++static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_type *type, char info_type) ++{ ++ switch (info_type) ++ { ++ case 0: ++ FIXME("Invalid operand count.\n"); ++ return false; ++ case '8': ++ return sm6_type_is_i8(type); ++ case 'i': ++ return sm6_type_is_i32(type); ++ case 'v': ++ return !type; ++ case 'o': ++ /* TODO: some type checking may be possible */ ++ return true; ++ default: ++ FIXME("Unhandled operand code '%c'.\n", info_type); ++ return false; ++ } ++} ++ ++static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const char *name, ++ const struct sm6_value **operands, unsigned int operand_count, struct sm6_value *dst) ++{ ++ const struct sm6_dx_opcode_info *info; ++ unsigned int i; ++ ++ info = &sm6_dx_op_table[op]; ++ ++ if (!sm6_parser_validate_operand_type(sm6, dst->type, info->ret_type)) ++ { ++ WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); ++ /* Return type validation failure is not so critical. We only need to set ++ * a data type for the SSA result. */ ++ } ++ ++ for (i = 0; i < operand_count; ++i) ++ { ++ const struct sm6_value *value = operands[i]; ++ if (!sm6_value_is_register(value) || !sm6_parser_validate_operand_type(sm6, value->type, info->operand_info[i])) ++ { ++ WARN("Failed to validate operand %u for dx intrinsic id %u, '%s'.\n", i + 1, op, name); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Operand %u for call to dx intrinsic function '%s' is invalid.", i + 1, name); ++ return false; ++ } ++ } ++ if (info->operand_info[operand_count]) ++ { ++ WARN("Missing operands for dx intrinsic id %u, '%s'.\n", op, name); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "Call to dx intrinsic function '%s' has missing operands.", name); ++ return false; ++ } ++ ++ return true; ++} ++ ++static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shader_instruction *ins, ++ struct sm6_value *dst) ++{ ++ const struct sm6_type *type; ++ ++ ins->handler_idx = VKD3DSIH_NOP; ++ ++ if (!dst->type) ++ return; ++ ++ type = sm6_type_get_scalar_type(dst->type, 0); ++ shader_register_init(&dst->u.reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0); ++ /* dst->is_undefined is not set here because it flags only explicitly undefined values. */ ++} ++ ++static void sm6_parser_decode_dx_op(struct sm6_parser *sm6, struct sm6_block *code_block, enum dx_intrinsic_opcode op, ++ const char *name, const struct sm6_value **operands, unsigned int operand_count, ++ struct vkd3d_shader_instruction *ins, struct sm6_value *dst) ++{ ++ if (op >= ARRAY_SIZE(sm6_dx_op_table) || !sm6_dx_op_table[op].operand_info) ++ { ++ FIXME("Unhandled dx intrinsic function id %u, '%s'.\n", op, name); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC, ++ "Call to intrinsic function %s is unhandled.", name); ++ sm6_parser_emit_unhandled(sm6, ins, dst); ++ return; ++ } ++ ++ if (sm6_parser_validate_dx_op(sm6, op, name, operands, operand_count, dst)) ++ sm6_dx_op_table[op].handler(sm6, code_block, op, operands, ins); ++ else ++ sm6_parser_emit_unhandled(sm6, ins, dst); ++} ++ ++static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) ++{ ++ const struct sm6_value *operands[DXIL_OP_MAX_OPERANDS]; ++ const struct sm6_value *fn_value, *op_value; ++ unsigned int i = 1, j, operand_count; ++ const struct sm6_type *type = NULL; ++ uint64_t call_conv; ++ ++ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) ++ return; ++ ++ /* TODO: load the 1-based attributes index from record->operands[0] and validate against attribute count. */ ++ ++ if ((call_conv = record->operands[i++]) & CALL_CONV_FLAG_EXPLICIT_TYPE) ++ type = sm6_parser_get_type(sm6, record->operands[i++]); ++ if (call_conv &= ~CALL_CONV_FLAG_EXPLICIT_TYPE) ++ WARN("Ignoring calling convention %#"PRIx64".\n", call_conv); ++ ++ if (!(fn_value = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))) ++ return; ++ if (!sm6_value_is_function_dcl(fn_value)) ++ { ++ WARN("Function target value is not a function declaration.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Function call target value is not a function declaration."); ++ return; ++ } ++ ++ if (type && type != fn_value->type->u.pointer.type) ++ WARN("Explicit call type does not match function type.\n"); ++ type = fn_value->type->u.pointer.type; ++ ++ if (!sm6_type_is_void(type->u.function->ret_type)) ++ dst->type = type->u.function->ret_type; ++ ++ operand_count = type->u.function->param_count; ++ if (operand_count > ARRAY_SIZE(operands)) ++ { ++ WARN("Ignoring %zu operands.\n", operand_count - ARRAY_SIZE(operands)); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %zu operands for function call.", operand_count - ARRAY_SIZE(operands)); ++ operand_count = ARRAY_SIZE(operands); ++ } ++ ++ for (j = 0; j < operand_count; ++j) ++ { ++ if (!(operands[j] = sm6_parser_get_value_by_ref(sm6, record, type->u.function->param_types[j], &i))) ++ return; ++ } ++ if ((j = record->operand_count - i)) ++ { ++ WARN("Ignoring %u operands beyond the function parameter list.\n", j); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Ignoring %u function call operands beyond the parameter list.", j); ++ } ++ ++ if (!fn_value->u.function.is_prototype) ++ { ++ FIXME("Unhandled call to local function.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Call to a local function is unsupported."); ++ return; ++ } ++ if (!sm6_value_is_dx_intrinsic_dcl(fn_value)) ++ WARN("External function is not a dx intrinsic.\n"); ++ ++ if (!operand_count) ++ { ++ WARN("Missing dx intrinsic function id.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, ++ "The id for a dx intrinsic function is missing."); ++ return; ++ } ++ ++ op_value = operands[0]; ++ if (!sm6_value_is_constant(op_value) || !sm6_type_is_integer(op_value->type)) ++ { ++ WARN("dx intrinsic function id is not a constant int.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Expected a constant integer dx intrinsic function id."); ++ return; ++ } ++ sm6_parser_decode_dx_op(sm6, code_block, register_get_uint_value(&op_value->u.reg), ++ fn_value->u.function.name, &operands[1], operand_count - 1, ins, dst); ++} ++ ++static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, ++ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) ++{ ++ if (!dxil_record_validate_operand_count(record, 0, 1, sm6)) ++ return; ++ ++ if (record->operand_count) ++ FIXME("Non-void return is not implemented.\n"); ++ ++ ins->handler_idx = VKD3DSIH_NOP; ++} ++ ++static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, ++ struct sm6_function *function) ++{ ++ struct vkd3d_shader_instruction *ins; ++ const struct dxil_record *record; ++ bool ret_found, is_terminator; ++ struct sm6_block *code_block; ++ struct sm6_value *dst; ++ size_t i, block_idx; ++ ++ if (sm6->function_count) ++ { ++ FIXME("Multiple functions are not supported yet.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!(function->declaration = sm6_parser_next_function_definition(sm6))) ++ { ++ WARN("Failed to find definition to match function body.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (block->record_count < 2) ++ { ++ /* It should contain at least a block count and a RET instruction. */ ++ WARN("Invalid function block record count %zu.\n", block->record_count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (block->records[0]->code != FUNC_CODE_DECLAREBLOCKS || !block->records[0]->operand_count ++ || block->records[0]->operands[0] > UINT_MAX) ++ { ++ WARN("Block count declaration not found or invalid.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!(function->block_count = block->records[0]->operands[0])) ++ { ++ WARN("Function contains no blocks.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (function->block_count > 1) ++ { ++ FIXME("Branched shaders are not supported yet.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (!(function->blocks[0] = sm6_block_create())) ++ { ++ ERR("Failed to allocate code block.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ code_block = function->blocks[0]; ++ ++ sm6->cur_max_value = function->value_count; ++ ++ for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) ++ { ++ sm6->p.location.column = i; ++ ++ if (!code_block) ++ { ++ WARN("Invalid block count %zu.\n", function->block_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid block count %zu.", function->block_count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ /* block->record_count - 1 is the instruction count, but some instructions ++ * can emit >1 IR instruction, so extra may be used. */ ++ if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, ++ max(code_block->instruction_count + 1, block->record_count), sizeof(*code_block->instructions))) ++ { ++ ERR("Failed to allocate instructions.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ ins = &code_block->instructions[code_block->instruction_count]; ++ ins->handler_idx = VKD3DSIH_INVALID; ++ ++ dst = sm6_parser_get_current_value(sm6); ++ dst->type = NULL; ++ dst->value_type = VALUE_TYPE_REG; ++ is_terminator = false; ++ ++ record = block->records[i]; ++ switch (record->code) ++ { ++ case FUNC_CODE_INST_CALL: ++ sm6_parser_emit_call(sm6, record, code_block, ins, dst); ++ break; ++ case FUNC_CODE_INST_RET: ++ sm6_parser_emit_ret(sm6, record, code_block, ins); ++ is_terminator = true; ++ ret_found = true; ++ break; ++ default: ++ FIXME("Unhandled dxil instruction %u.\n", record->code); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (sm6->p.failed) ++ return VKD3D_ERROR; ++ assert(ins->handler_idx != VKD3DSIH_INVALID); ++ ++ if (is_terminator) ++ { ++ ++block_idx; ++ code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; ++ } ++ if (code_block) ++ code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; ++ else ++ assert(ins->handler_idx == VKD3DSIH_NOP); ++ ++ sm6->value_count += !!dst->type; ++ } ++ ++ if (!ret_found) ++ { ++ WARN("Function contains no RET instruction.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static bool sm6_block_emit_instructions(struct sm6_block *block, struct sm6_parser *sm6) ++{ ++ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, block->instruction_count + 1); ++ ++ if (!ins) ++ return false; ++ ++ memcpy(ins, block->instructions, block->instruction_count * sizeof(*block->instructions)); ++ sm6->p.instructions.count += block->instruction_count; ++ ++ sm6_parser_add_instruction(sm6, VKD3DSIH_RET); ++ ++ return true; ++} ++ ++static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, ++ unsigned int level) ++{ ++ size_t i, old_value_count = sm6->value_count; ++ struct sm6_function *function; ++ enum vkd3d_result ret; ++ ++ for (i = 0; i < block->child_block_count; ++i) ++ { ++ if ((ret = sm6_parser_module_init(sm6, block->child_blocks[i], level + 1)) < 0) ++ return ret; ++ } ++ ++ sm6->p.location.line = block->id; ++ sm6->p.location.column = 0; ++ ++ switch (block->id) ++ { ++ case CONSTANTS_BLOCK: ++ function = &sm6->functions[sm6->function_count]; ++ sm6->cur_max_value = function->value_count; ++ return sm6_parser_constants_init(sm6, block); ++ ++ case FUNCTION_BLOCK: ++ function = &sm6->functions[sm6->function_count]; ++ if ((ret = sm6_parser_function_init(sm6, block, function)) < 0) ++ return ret; ++ /* The value index returns to its previous value after handling a function. It's usually nonzero ++ * at the start because of global constants/variables/function declarations. Function constants ++ * occur in a child block, so value_count is already saved before they are emitted. */ ++ memset(&sm6->values[old_value_count], 0, (sm6->value_count - old_value_count) * sizeof(*sm6->values)); ++ sm6->value_count = old_value_count; ++ break; ++ ++ case BLOCKINFO_BLOCK: ++ case MODULE_BLOCK: ++ case PARAMATTR_BLOCK: ++ case PARAMATTR_GROUP_BLOCK: ++ case VALUE_SYMTAB_BLOCK: ++ case METADATA_BLOCK: ++ case METADATA_ATTACHMENT_BLOCK: ++ case TYPE_BLOCK: ++ break; ++ ++ default: ++ FIXME("Unhandled block id %u.\n", block->id); ++ break; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static void sm6_type_table_cleanup(struct sm6_type *types, size_t count) ++{ ++ size_t i; ++ ++ if (!types) ++ return; ++ ++ for (i = 0; i < count; ++i) ++ { ++ switch (types[i].class) ++ { ++ case TYPE_CLASS_STRUCT: ++ vkd3d_free((void *)types[i].u.struc->name); ++ vkd3d_free(types[i].u.struc); ++ break; ++ case TYPE_CLASS_FUNCTION: ++ vkd3d_free(types[i].u.function); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ vkd3d_free(types); ++} ++ ++static void sm6_symtab_cleanup(struct sm6_symbol *symbols, size_t count) ++{ ++ size_t i; ++ ++ for (i = 0; i < count; ++i) ++ vkd3d_free((void *)symbols[i].name); ++ vkd3d_free(symbols); ++} ++ ++static void sm6_block_destroy(struct sm6_block *block) ++{ ++ vkd3d_free(block->instructions); ++ vkd3d_free(block); ++} ++ ++static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) ++{ ++ size_t i, j; ++ ++ for (i = 0; i < count; ++i) ++ { ++ for (j = 0; j < functions[i].block_count; ++j) ++ sm6_block_destroy(functions[i].blocks[j]); ++ } ++ vkd3d_free(functions); ++} ++ ++static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) ++{ ++ struct sm6_parser *sm6 = sm6_parser(parser); ++ ++ dxil_block_destroy(&sm6->root_block); ++ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); ++ shader_instruction_array_destroy(&parser->instructions); ++ sm6_type_table_cleanup(sm6->types, sm6->type_count); ++ sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); ++ sm6_functions_cleanup(sm6->functions, sm6->function_count); ++ vkd3d_free(sm6->values); ++ free_shader_desc(&parser->shader_desc); ++ vkd3d_free(sm6); ++} ++ ++static const struct vkd3d_shader_parser_ops sm6_parser_ops = ++{ ++ .parser_destroy = sm6_parser_destroy, ++}; ++ ++static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, ++ const char *source_name, struct vkd3d_shader_message_context *message_context) ++{ ++ const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; ++ const struct vkd3d_shader_location location = {.source_name = source_name}; ++ uint32_t version_token, dxil_version, token_count, magic; ++ unsigned int chunk_offset, chunk_size; ++ size_t count, length, function_count; ++ enum bitcode_block_abbreviation abbr; ++ struct vkd3d_shader_version version; ++ struct dxil_block *block; ++ enum vkd3d_result ret; ++ unsigned int i; ++ ++ count = byte_code_size / sizeof(*byte_code); ++ if (count < 6) ++ { ++ WARN("Invalid data size %zu.\n", byte_code_size); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, ++ "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ version_token = byte_code[0]; ++ TRACE("Compiler version: 0x%08x.\n", version_token); ++ token_count = byte_code[1]; ++ TRACE("Token count: %u.\n", token_count); ++ ++ if (token_count < 6 || count < token_count) ++ { ++ WARN("Invalid token count %u (word count %zu).\n", token_count, count); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, ++ "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (byte_code[2] != TAG_DXIL) ++ WARN("Unknown magic number 0x%08x.\n", byte_code[2]); ++ ++ dxil_version = byte_code[3]; ++ if (dxil_version > 0x102) ++ WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); ++ else ++ TRACE("DXIL version: 0x%08x.\n", dxil_version); ++ ++ chunk_offset = byte_code[4]; ++ if (chunk_offset < 16 || chunk_offset >= byte_code_size) ++ { ++ WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, ++ "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ chunk_size = byte_code[5]; ++ if (chunk_size > byte_code_size - chunk_offset) ++ { ++ WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", ++ chunk_size, byte_code_size, chunk_offset); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, ++ "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", ++ chunk_size, byte_code_size, chunk_offset); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); ++ if ((magic = sm6->start[0]) != BITCODE_MAGIC) ++ { ++ WARN("Unknown magic number 0x%08x.\n", magic); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, ++ "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); ++ } ++ ++ sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; ++ ++ if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) ++ { ++ FIXME("Unknown shader type %#x.\n", version.type); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, ++ "Unknown shader type %#x.", version.type); ++ } ++ ++ version.major = VKD3D_SM6_VERSION_MAJOR(version_token); ++ version.minor = VKD3D_SM6_VERSION_MINOR(version_token); ++ ++ if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) ++ { ++ WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, ++ "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ /* Estimate instruction count to avoid reallocation in most shaders. */ ++ count = max(token_count, 400) - 400; ++ vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, ++ (count + (count >> 2)) / 2u + 10); ++ sm6->ptr = &sm6->start[1]; ++ sm6->bitpos = 2; ++ ++ block = &sm6->root_block; ++ if ((ret = dxil_block_init(block, NULL, sm6)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL bitcode chunk."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, ++ "DXIL bitcode chunk has invalid bitcode."); ++ else ++ vkd3d_unreachable(); ++ return ret; ++ } ++ ++ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); ++ sm6->abbrevs = NULL; ++ sm6->abbrev_count = 0; ++ ++ length = sm6->ptr - sm6->start - block->start; ++ if (length != block->length) ++ { ++ WARN("Invalid block length %zu; expected %u.\n", length, block->length); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, ++ "Root block ends with length %zu but indicated length is %u.", length, block->length); ++ } ++ if (sm6->ptr != sm6->end) ++ { ++ size_t expected_length = sm6->end - sm6->start; ++ length = sm6->ptr - sm6->start; ++ WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, ++ "Module ends with length %zu but indicated length is %zu.", length, expected_length); ++ } ++ ++ if ((ret = sm6_parser_type_table_init(sm6)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL type table."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE, ++ "DXIL type table is invalid."); ++ else ++ vkd3d_unreachable(); ++ return ret; ++ } ++ ++ if ((ret = sm6_parser_symtab_init(sm6)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL value symbol table."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB, ++ "DXIL value symbol table is invalid."); ++ else ++ vkd3d_unreachable(); ++ return ret; ++ } ++ ++ if (!(sm6->output_params = shader_parser_get_dst_params(&sm6->p, output_signature->element_count))) ++ { ++ ERR("Failed to allocate output parameters.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating output parameters."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ function_count = dxil_block_compute_function_count(&sm6->root_block); ++ if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) ++ { ++ ERR("Failed to allocate function array.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating DXIL function array."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) ++ { ++ WARN("Value array count overflowed.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "Overflow occurred in the DXIL module value count."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) ++ { ++ ERR("Failed to allocate value array.\n"); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory allocating DXIL value array."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ if ((ret = sm6_parser_globals_init(sm6)) < 0) ++ { ++ WARN("Failed to load global declarations.\n"); ++ return ret; ++ } ++ ++ sm6_parser_init_output_signature(sm6, output_signature); ++ ++ if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) ++ { ++ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory parsing DXIL module."); ++ else if (ret == VKD3D_ERROR_INVALID_SHADER) ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, ++ "DXIL module is invalid."); ++ return ret; ++ } ++ ++ if (!sm6_parser_require_space(sm6, output_signature->element_count)) ++ { ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory emitting shader signature declarations."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ sm6_parser_emit_output_signature(sm6, output_signature); ++ ++ for (i = 0; i < sm6->function_count; ++i) ++ { ++ if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) ++ { ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, ++ "Out of memory emitting shader instructions."); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ } ++ ++ dxil_block_destroy(&sm6->root_block); ++ ++ return VKD3D_OK; ++} ++ ++int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) ++{ ++ struct vkd3d_shader_desc *shader_desc; ++ uint32_t *byte_code = NULL; ++ struct sm6_parser *sm6; ++ int ret; ++ ++ ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); ++ ++ if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) ++ { ++ ERR("Failed to allocate parser.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ shader_desc = &sm6->p.shader_desc; ++ shader_desc->is_dxil = true; ++ if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, ++ shader_desc)) < 0) ++ { ++ WARN("Failed to extract shader, vkd3d result %d.\n", ret); ++ vkd3d_free(sm6); ++ return ret; ++ } ++ ++ sm6->p.shader_desc = *shader_desc; ++ shader_desc = &sm6->p.shader_desc; ++ ++ if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) ++ { ++ /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC ++ * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ ++ if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) ++ ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); ++ else ++ memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); ++ } ++ ++ ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, ++ compile_info->source_name, message_context); ++ vkd3d_free(byte_code); ++ ++ if (ret < 0) ++ { ++ WARN("Failed to initialise shader parser.\n"); ++ sm6_parser_destroy(&sm6->p); ++ return ret; ++ } ++ ++ *parser = &sm6->p; ++ ++ return ret; ++} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 64d6e87065b..ba5bcfbfaf0 100644 +index 64d6e87065b..5fe9047bf25 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) +@@ -72,6 +72,27 @@ void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, c + ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; + } + ++char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) ++{ ++ struct vkd3d_string_buffer *string; ++ va_list args; ++ char *ret; ++ ++ if (!(string = hlsl_get_string_buffer(ctx))) ++ return NULL; ++ va_start(args, fmt); ++ if (vkd3d_string_buffer_vprintf(string, fmt, args) < 0) ++ { ++ va_end(args); ++ hlsl_release_string_buffer(ctx, string); ++ return NULL; ++ } ++ va_end(args); ++ ret = hlsl_strdup(ctx, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return ret; ++} ++ + bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var) + { + struct hlsl_scope *scope = ctx->cur_scope; +@@ -112,8 +133,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) void hlsl_free_var(struct hlsl_ir_var *decl) { @@ -5518,7 +9768,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 vkd3d_free(decl); } -@@ -126,7 +130,7 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) +@@ -126,7 +151,7 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) unsigned int hlsl_type_minor_size(const struct hlsl_type *type) { @@ -5527,7 +9777,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return type->dimx; else return type->dimy; -@@ -134,7 +138,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type) +@@ -134,7 +159,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type) unsigned int hlsl_type_major_size(const struct hlsl_type *type) { @@ -5536,7 +9786,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return type->dimy; else return type->dimx; -@@ -142,7 +146,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type) +@@ -142,7 +167,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type) unsigned int hlsl_type_element_count(const struct hlsl_type *type) { @@ -5545,7 +9795,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { case HLSL_CLASS_VECTOR: return type->dimx; -@@ -157,16 +161,26 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) +@@ -157,16 +182,26 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) } } @@ -5576,7 +9826,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { switch (type->base_type) { -@@ -183,10 +197,13 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) +@@ -183,10 +218,13 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) { @@ -5592,7 +9842,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { switch (type->base_type) { -@@ -203,8 +220,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) +@@ -203,8 +241,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) vkd3d_unreachable(); } } @@ -5601,7 +9851,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 vkd3d_unreachable(); } -@@ -216,7 +231,8 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int +@@ -216,7 +252,8 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int * (b) the type would cross a vec4 boundary; i.e. a vec3 and a * vec1 can be packed together, but not a vec3 and a vec2. */ @@ -5611,7 +9861,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return align(offset, 4); return offset; } -@@ -229,7 +245,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type +@@ -229,7 +266,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type for (k = 0; k <= HLSL_REGSET_LAST; ++k) type->reg_size[k] = 0; @@ -5620,7 +9870,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: -@@ -278,7 +294,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type +@@ -278,7 +315,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type type->reg_size[k] += field->type->reg_size[k]; } @@ -5629,7 +9879,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } break; } -@@ -317,7 +333,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e +@@ -317,7 +354,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e vkd3d_free(type); return NULL; } @@ -5638,7 +9888,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 type->base_type = base_type; type->dimx = dimx; type->dimy = dimy; -@@ -330,7 +346,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e +@@ -330,7 +367,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e static bool type_is_single_component(const struct hlsl_type *type) { @@ -5647,7 +9897,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } /* Given a type and a component index, this function moves one step through the path required to -@@ -349,7 +365,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, +@@ -349,7 +386,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, assert(!type_is_single_component(type)); assert(index < hlsl_type_component_count(type)); @@ -5656,7 +9906,59 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { case HLSL_CLASS_VECTOR: assert(index < type->dimx); -@@ -427,7 +443,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl +@@ -414,6 +451,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl + return type; + } + ++unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, ++ enum hlsl_regset regset, unsigned int index) ++{ ++ struct hlsl_type *next_type; ++ unsigned int offset = 0; ++ unsigned int idx; ++ ++ while (!type_is_single_component(type)) ++ { ++ next_type = type; ++ idx = traverse_path_from_component_index(ctx, &next_type, &index); ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ if (regset == HLSL_REGSET_NUMERIC) ++ offset += idx; ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ offset += type->e.record.fields[idx].reg_offset[regset]; ++ break; ++ ++ case HLSL_CLASS_ARRAY: ++ if (regset == HLSL_REGSET_NUMERIC) ++ offset += idx * align(type->e.array.type->reg_size[regset], 4); ++ else ++ offset += idx * type->e.array.type->reg_size[regset]; ++ break; ++ ++ case HLSL_CLASS_OBJECT: ++ assert(idx == 0); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ type = next_type; ++ } ++ ++ return offset; ++} ++ + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, + unsigned int path_len) + { +@@ -427,7 +509,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; } @@ -5665,7 +9967,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { deref->var = NULL; deref->path_len = 0; -@@ -437,6 +453,71 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl +@@ -437,13 +519,80 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; } @@ -5737,7 +10039,17 @@ index 64d6e87065b..ba5bcfbfaf0 100644 struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { struct hlsl_type *type; -@@ -459,9 +540,9 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl + unsigned int i; + + assert(deref); +- assert(!deref->offset.node); ++ ++ if (deref->offset.node) ++ return deref->data_type; + + type = deref->var->data_type; + for (i = 0; i < deref->path_len; ++i) +@@ -459,9 +608,9 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl { unsigned int path_len, path_index, deref_path_len, i; struct hlsl_type *path_type; @@ -5749,7 +10061,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 path_len = 0; path_type = hlsl_deref_get_type(ctx, prefix); -@@ -487,12 +568,12 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -487,12 +636,12 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl if (!(c = hlsl_new_uint_constant(ctx, next_index, loc))) { @@ -5765,7 +10077,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } assert(deref_path_len == deref->path_len); -@@ -505,7 +586,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co +@@ -505,7 +654,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { assert(idx); @@ -5774,7 +10086,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { case HLSL_CLASS_VECTOR: return hlsl_get_scalar_type(ctx, type->base_type); -@@ -523,8 +604,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co +@@ -523,8 +672,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { struct hlsl_ir_constant *c = hlsl_ir_constant(idx); @@ -5785,7 +10097,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } default: -@@ -539,7 +620,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba +@@ -539,12 +688,13 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; @@ -5794,7 +10106,13 @@ index 64d6e87065b..ba5bcfbfaf0 100644 type->modifiers = basic_type->modifiers; type->e.array.elements_count = array_size; type->e.array.type = basic_type; -@@ -559,7 +640,7 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + type->dimx = basic_type->dimx; + type->dimy = basic_type->dimy; ++ type->sampler_dim = basic_type->sampler_dim; + hlsl_type_calculate_reg_size(ctx, type); + + list_add_tail(&ctx->types, &type->entry); +@@ -559,7 +709,7 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; @@ -5803,7 +10121,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 type->base_type = HLSL_TYPE_VOID; type->name = name; type->dimy = 1; -@@ -579,7 +660,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ +@@ -579,7 +729,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; @@ -5812,7 +10130,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 type->base_type = HLSL_TYPE_TEXTURE; type->dimx = 4; type->dimy = 1; -@@ -597,7 +678,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim +@@ -597,7 +747,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim if (!(type = vkd3d_calloc(1, sizeof(*type)))) return NULL; @@ -5821,7 +10139,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 type->base_type = HLSL_TYPE_UAV; type->dimx = format->dimx; type->dimy = 1; -@@ -614,6 +695,8 @@ static const char * get_case_insensitive_typename(const char *name) +@@ -614,6 +764,8 @@ static const char * get_case_insensitive_typename(const char *name) { "dword", "float", @@ -5830,7 +10148,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 }; unsigned int i; -@@ -677,7 +760,7 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha +@@ -677,7 +829,7 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha unsigned int hlsl_type_component_count(const struct hlsl_type *type) { @@ -5839,7 +10157,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: -@@ -709,7 +792,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 +@@ -709,7 +861,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 if (t1 == t2) return true; @@ -5848,7 +10166,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return false; if (t1->base_type != t2->base_type) return false; -@@ -729,7 +812,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 +@@ -729,7 +881,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; if (t1->dimy != t2->dimy) return false; @@ -5857,7 +10175,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { size_t i; -@@ -748,7 +831,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 +@@ -748,7 +900,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; } } @@ -5866,7 +10184,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return t1->e.array.elements_count == t2->e.array.elements_count && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); -@@ -772,7 +855,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, +@@ -772,7 +924,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, return NULL; } } @@ -5875,7 +10193,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 type->base_type = old->base_type; type->dimx = old->dimx; type->dimy = old->dimy; -@@ -781,7 +864,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, +@@ -781,7 +933,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, type->modifiers |= default_majority; type->sampler_dim = old->sampler_dim; type->is_minimum_precision = old->is_minimum_precision; @@ -5884,7 +10202,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { case HLSL_CLASS_ARRAY: if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) -@@ -799,7 +882,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, +@@ -799,7 +951,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, type->e.record.field_count = field_count; @@ -5893,7 +10211,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { vkd3d_free((void *)type->name); vkd3d_free(type); -@@ -848,40 +931,58 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) +@@ -848,40 +1000,58 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) return true; } @@ -5958,16 +10276,47 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return var; } -@@ -901,7 +1002,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem - hlsl_release_string_buffer(ctx, string); +@@ -890,27 +1060,37 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem + { + struct vkd3d_string_buffer *string; + struct hlsl_ir_var *var; +- static LONG counter; +- const char *name; + + if (!(string = hlsl_get_string_buffer(ctx))) return NULL; - } +- vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); +- if (!(name = hlsl_strdup(ctx, string->buffer))) +- { +- hlsl_release_string_buffer(ctx, string); +- return NULL; +- } - var = hlsl_new_var(ctx, name, type, *loc, NULL, 0, NULL); -+ var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); ++ vkd3d_string_buffer_printf(string, "<%s-%u>", template, ctx->internal_name_counter++); ++ var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); hlsl_release_string_buffer(ctx, string); ++ return var; ++} ++ ++struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, ++ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope) ++{ ++ struct hlsl_ir_var *var; ++ const char *name_copy; ++ ++ if (!(name_copy = hlsl_strdup(ctx, name))) ++ return NULL; ++ var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); if (var) - list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); -@@ -910,7 +1011,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem +- list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); ++ { ++ if (dummy_scope) ++ list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); ++ else ++ list_add_tail(&ctx->globals->vars, &var->scope_entry); ++ } + return var; + } static bool type_is_single_reg(const struct hlsl_type *type) { @@ -5976,7 +10325,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other) -@@ -964,7 +1065,7 @@ static void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, +@@ -964,7 +1144,7 @@ static void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, list_init(&node->uses); } @@ -5985,7 +10334,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { struct hlsl_deref lhs_deref; -@@ -972,7 +1073,7 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -972,7 +1152,7 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir return hlsl_new_store_index(ctx, &lhs_deref, NULL, rhs, 0, &rhs->loc); } @@ -5994,7 +10343,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc) { struct hlsl_ir_store *store; -@@ -1001,35 +1102,35 @@ struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hl +@@ -1001,35 +1181,35 @@ struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hl writemask = (1 << rhs->data_type->dimx) - 1; store->writemask = writemask; @@ -6038,7 +10387,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, -@@ -1045,66 +1146,54 @@ struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function +@@ -1045,66 +1225,54 @@ struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function return &call->node; } @@ -6066,11 +10415,11 @@ index 64d6e87065b..ba5bcfbfaf0 100644 +struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; -- -- if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) -- c->value[0].u = b ? ~0u : 0; + struct hlsl_constant_value value; +- if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) +- c->value[0].u = b ? ~0u : 0; +- - return c; + value.u[0].u = b ? ~0u : 0; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &value, loc); @@ -6098,11 +10447,11 @@ index 64d6e87065b..ba5bcfbfaf0 100644 - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); -- -- if (c) -- c->value[0].i = n; + struct hlsl_constant_value value; +- if (c) +- c->value[0].i = n; +- - return c; + value.u[0].i = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &value, loc); @@ -6115,18 +10464,18 @@ index 64d6e87065b..ba5bcfbfaf0 100644 - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -+ struct hlsl_constant_value value; - +- - if (c) - c->value[0].u = n; -- ++ struct hlsl_constant_value value; + - return c; + value.u[0].u = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); } struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -@@ -1124,11 +1213,11 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op +@@ -1124,11 +1292,11 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op } struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -6140,7 +10489,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -@@ -1140,17 +1229,21 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp +@@ -1140,17 +1308,21 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -6167,7 +10516,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, -@@ -1183,23 +1276,36 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl +@@ -1183,23 +1355,36 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; } @@ -6208,7 +10557,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 if (!(load = hlsl_alloc(ctx, sizeof(*load)))) return NULL; -@@ -1213,14 +1319,14 @@ struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1213,14 +1398,14 @@ struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b vkd3d_free(load); return NULL; } @@ -6227,7 +10576,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_load *load; -@@ -1229,24 +1335,37 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, +@@ -1229,24 +1414,37 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, return NULL; init_node(&load->node, HLSL_IR_RESOURCE_LOAD, params->format, loc); load->load_type = params->type; @@ -6272,7 +10621,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_store *store; -@@ -1257,10 +1376,10 @@ struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, con +@@ -1257,10 +1455,10 @@ struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, con hlsl_copy_deref(ctx, &store->resource, resource); hlsl_src_from_node(&store->coords, coords); hlsl_src_from_node(&store->value, value); @@ -6285,7 +10634,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) { struct hlsl_ir_swizzle *swizzle; -@@ -1275,29 +1394,66 @@ struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned +@@ -1275,29 +1473,67 @@ struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); hlsl_src_from_node(&swizzle->val, val); swizzle->swizzle = s; @@ -6329,7 +10678,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 -struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, -+ const struct vkd3d_shader_location *loc) ++ struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump; @@ -6339,6 +10688,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; - return jump; ++ hlsl_src_from_node(&jump->condition, condition); + return &jump->node; } @@ -6360,7 +10710,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } struct clone_instr_map -@@ -1319,11 +1475,13 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, +@@ -1319,20 +1555,22 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_ir_node *src; struct hlsl_ir_node *dst; @@ -6374,8 +10724,10 @@ index 64d6e87065b..ba5bcfbfaf0 100644 + hlsl_block_cleanup(dst_block); return false; } - list_add_tail(&dst_block->instrs, &dst->entry); -@@ -1332,7 +1490,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, +- list_add_tail(&dst_block->instrs, &dst->entry); ++ hlsl_block_add_instr(dst_block, dst); + + if (!list_empty(&src->uses)) { if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) { @@ -6384,7 +10736,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return false; } -@@ -1390,12 +1548,7 @@ static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call +@@ -1390,12 +1628,7 @@ static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) { @@ -6398,7 +10750,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src) -@@ -1411,27 +1564,30 @@ static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_ +@@ -1411,27 +1644,30 @@ static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) { @@ -6408,11 +10760,11 @@ index 64d6e87065b..ba5bcfbfaf0 100644 - if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) + if (!clone_block(ctx, &then_block, &src->then_block, map)) -+ return NULL; + return NULL; + if (!clone_block(ctx, &else_block, &src->else_block, map)) + { + hlsl_block_cleanup(&then_block); - return NULL; ++ return NULL; + } - if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) @@ -6429,18 +10781,19 @@ index 64d6e87065b..ba5bcfbfaf0 100644 + return dst; } - static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) +-static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) ++static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) { - struct hlsl_ir_jump *dst; - - if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) - return NULL; - return &dst->node; -+ return hlsl_new_jump(ctx, src->type, &src->node.loc); ++ return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); } static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) -@@ -1452,16 +1608,18 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ +@@ -1452,16 +1688,18 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { @@ -6464,7 +10817,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, -@@ -1486,7 +1644,12 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, +@@ -1486,7 +1724,12 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, } clone_src(map, &dst->coords, &src->coords); clone_src(map, &dst->lod, &src->lod); @@ -6477,7 +10830,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return &dst->node; } -@@ -1529,12 +1692,19 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr +@@ -1529,12 +1772,19 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { @@ -6501,7 +10854,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 } static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, -@@ -1554,6 +1724,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, +@@ -1554,8 +1804,11 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_IF: return clone_if(ctx, map, hlsl_ir_if(instr)); @@ -6509,9 +10862,12 @@ index 64d6e87065b..ba5bcfbfaf0 100644 + return clone_index(ctx, map, hlsl_ir_index(instr)); + case HLSL_IR_JUMP: - return clone_jump(ctx, hlsl_ir_jump(instr)); +- return clone_jump(ctx, hlsl_ir_jump(instr)); ++ return clone_jump(ctx, map, hlsl_ir_jump(instr)); -@@ -1593,13 +1766,12 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + case HLSL_IR_LOAD: + return clone_load(ctx, map, hlsl_ir_load(instr)); +@@ -1593,13 +1846,12 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) { @@ -6527,7 +10883,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 decl->return_type = return_type; decl->parameters = *parameters; decl->loc = *loc; -@@ -1620,17 +1792,17 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, +@@ -1620,17 +1872,17 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, if (!(constant = hlsl_new_bool_constant(ctx, false, loc))) return decl; @@ -6549,7 +10905,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { struct hlsl_buffer *buffer; -@@ -1640,7 +1812,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type +@@ -1640,7 +1892,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type buffer->name = name; if (reservation) buffer->reservation = *reservation; @@ -6558,7 +10914,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 list_add_tail(&ctx->buffers, &buffer->entry); return buffer; } -@@ -1698,10 +1870,10 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls +@@ -1698,10 +1950,10 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls { int r; @@ -6572,7 +10928,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return r; } if ((r = vkd3d_u32_compare(t1->base_type, t2->base_type))) -@@ -1718,7 +1890,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls +@@ -1718,7 +1970,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls return r; if ((r = vkd3d_u32_compare(t1->dimy, t2->dimy))) return r; @@ -6581,7 +10937,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { size_t i; -@@ -1738,7 +1910,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls +@@ -1738,7 +1990,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls } return 0; } @@ -6590,7 +10946,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { if ((r = vkd3d_u32_compare(t1->e.array.elements_count, t2->e.array.elements_count))) return r; -@@ -1768,7 +1940,7 @@ static int compare_function_decl_rb(const void *key, const struct rb_entry *entr +@@ -1768,7 +2020,7 @@ static int compare_function_decl_rb(const void *key, const struct rb_entry *entr struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) { @@ -6599,7 +10955,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 static const char *const base_types[] = { -@@ -1789,7 +1961,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -1789,7 +2041,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru return string; } @@ -6608,7 +10964,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { case HLSL_CLASS_SCALAR: assert(type->base_type < ARRAY_SIZE(base_types)); -@@ -1808,10 +1980,9 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -1808,10 +2060,9 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru case HLSL_CLASS_ARRAY: { @@ -6620,7 +10976,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 ; if ((inner_string = hlsl_type_to_string(ctx, t))) -@@ -1820,7 +1991,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -1820,7 +2071,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru hlsl_release_string_buffer(ctx, inner_string); } @@ -6629,7 +10985,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) vkd3d_string_buffer_printf(string, "[]"); -@@ -1860,13 +2031,26 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -1860,13 +2111,26 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru assert(type->sampler_dim < ARRAY_SIZE(dimensions)); assert(type->e.resource_format->base_type < ARRAY_SIZE(base_types)); @@ -6660,15 +11016,99 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return string; default: -@@ -1943,6 +2127,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - "HLSL_IR_CONSTANT", - "HLSL_IR_EXPR", - "HLSL_IR_IF", -+ "HLSL_IR_INDEX", - "HLSL_IR_LOAD", - "HLSL_IR_LOOP", - "HLSL_IR_JUMP", -@@ -2107,7 +2292,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl +@@ -1881,6 +2145,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + } + } + ++struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, ++ unsigned int index) ++{ ++ struct hlsl_type *type = var->data_type, *current_type; ++ struct vkd3d_string_buffer *buffer; ++ unsigned int element_index; ++ ++ if (!(buffer = hlsl_get_string_buffer(ctx))) ++ return NULL; ++ ++ vkd3d_string_buffer_printf(buffer, "%s", var->name); ++ ++ while (!type_is_single_component(type)) ++ { ++ current_type = type; ++ element_index = traverse_path_from_component_index(ctx, &type, &index); ++ if (current_type->class == HLSL_CLASS_STRUCT) ++ vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); ++ else ++ vkd3d_string_buffer_printf(buffer, "[%u]", element_index); ++ } ++ ++ return buffer; ++} ++ + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) + { + struct vkd3d_string_buffer *string; +@@ -1939,17 +2228,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + { + static const char * const names[] = + { +- "HLSL_IR_CALL", +- "HLSL_IR_CONSTANT", +- "HLSL_IR_EXPR", +- "HLSL_IR_IF", +- "HLSL_IR_LOAD", +- "HLSL_IR_LOOP", +- "HLSL_IR_JUMP", +- "HLSL_IR_RESOURCE_LOAD", +- "HLSL_IR_RESOURCE_STORE", +- "HLSL_IR_STORE", +- "HLSL_IR_SWIZZLE", ++ [HLSL_IR_CALL ] = "HLSL_IR_CALL", ++ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", ++ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", ++ [HLSL_IR_IF ] = "HLSL_IR_IF", ++ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", ++ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", ++ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", ++ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", ++ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", ++ [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", ++ [HLSL_IR_STORE ] = "HLSL_IR_STORE", ++ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + }; + + if (type >= ARRAY_SIZE(names)) +@@ -1961,10 +2251,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) + { + static const char * const names[] = + { +- "HLSL_IR_JUMP_BREAK", +- "HLSL_IR_JUMP_CONTINUE", +- "HLSL_IR_JUMP_DISCARD", +- "HLSL_IR_JUMP_RETURN", ++ [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", ++ [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", ++ [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", ++ [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", ++ [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", + }; + + assert(type < ARRAY_SIZE(names)); +@@ -1973,11 +2264,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) + + static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr); + +-static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) ++static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) + { + struct hlsl_ir_node *instr; + +- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + dump_instr(ctx, buffer, instr); + vkd3d_string_buffer_printf(buffer, "\n"); +@@ -2107,7 +2398,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl vkd3d_string_buffer_printf(buffer, "{"); for (x = 0; x < type->dimx; ++x) { @@ -6677,7 +11117,19 @@ index 64d6e87065b..ba5bcfbfaf0 100644 switch (type->base_type) { -@@ -2168,6 +2353,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) +@@ -2152,7 +2443,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_COS] = "cos", + [HLSL_OP1_COS_REDUCED] = "cos_reduced", + [HLSL_OP1_DSX] = "dsx", ++ [HLSL_OP1_DSX_COARSE] = "dsx_coarse", ++ [HLSL_OP1_DSX_FINE] = "dsx_fine", + [HLSL_OP1_DSY] = "dsy", ++ [HLSL_OP1_DSY_COARSE] = "dsy_coarse", ++ [HLSL_OP1_DSY_FINE] = "dsy_fine", + [HLSL_OP1_EXP2] = "exp2", + [HLSL_OP1_FRACT] = "fract", + [HLSL_OP1_LOG2] = "log2", +@@ -2168,6 +2463,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_SIN] = "sin", [HLSL_OP1_SIN_REDUCED] = "sin_reduced", [HLSL_OP1_SQRT] = "sqrt", @@ -6685,19 +11137,53 @@ index 64d6e87065b..ba5bcfbfaf0 100644 [HLSL_OP2_ADD] = "+", [HLSL_OP2_BIT_AND] = "&", -@@ -2214,9 +2400,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, +@@ -2190,7 +2486,8 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP2_RSHIFT] = ">>", + + [HLSL_OP3_DP2ADD] = "dp2add", +- [HLSL_OP3_LERP] = "lerp", ++ [HLSL_OP3_MOVC] = "movc", ++ [HLSL_OP3_TERNARY] = "ternary", + }; + + return op_names[op]; +@@ -2214,9 +2511,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "if ("); dump_src(buffer, &if_node->condition); vkd3d_string_buffer_printf(buffer, ") {\n"); - dump_instr_list(ctx, buffer, &if_node->then_instrs.instrs); -+ dump_instr_list(ctx, buffer, &if_node->then_block.instrs); ++ dump_block(ctx, buffer, &if_node->then_block); vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); - dump_instr_list(ctx, buffer, &if_node->else_instrs.instrs); -+ dump_instr_list(ctx, buffer, &if_node->else_block.instrs); ++ dump_block(ctx, buffer, &if_node->else_block); vkd3d_string_buffer_printf(buffer, " %10s }", ""); } -@@ -2255,7 +2441,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru +@@ -2232,8 +2529,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i + vkd3d_string_buffer_printf(buffer, "continue"); + break; + +- case HLSL_IR_JUMP_DISCARD: +- vkd3d_string_buffer_printf(buffer, "discard"); ++ case HLSL_IR_JUMP_DISCARD_NEG: ++ vkd3d_string_buffer_printf(buffer, "discard_neg"); ++ break; ++ ++ case HLSL_IR_JUMP_DISCARD_NZ: ++ vkd3d_string_buffer_printf(buffer, "discard_nz"); + break; + + case HLSL_IR_JUMP_RETURN: +@@ -2245,7 +2546,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i + static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) + { + vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); +- dump_instr_list(ctx, buffer, &loop->body.instrs); ++ dump_block(ctx, buffer, &loop->body); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); + } + +@@ -2255,11 +2556,17 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru { [HLSL_RESOURCE_LOAD] = "load_resource", [HLSL_RESOURCE_SAMPLE] = "sample", @@ -6709,10 +11195,23 @@ index 64d6e87065b..ba5bcfbfaf0 100644 [HLSL_RESOURCE_GATHER_RED] = "gather_red", [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", -@@ -2269,6 +2459,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", ++ [HLSL_RESOURCE_SAMPLE_INFO] = "sample_info", ++ [HLSL_RESOURCE_RESINFO] = "resinfo", + }; + + assert(load->load_type < ARRAY_SIZE(type_names)); +@@ -2267,8 +2574,16 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + dump_deref(buffer, &load->resource); + vkd3d_string_buffer_printf(buffer, ", sampler = "); dump_deref(buffer, &load->sampler); - vkd3d_string_buffer_printf(buffer, ", coords = "); - dump_src(buffer, &load->coords); +- vkd3d_string_buffer_printf(buffer, ", coords = "); +- dump_src(buffer, &load->coords); ++ if (load->coords.node) ++ { ++ vkd3d_string_buffer_printf(buffer, ", coords = "); ++ dump_src(buffer, &load->coords); ++ } + if (load->sample_index.node) + { + vkd3d_string_buffer_printf(buffer, ", sample index = "); @@ -6721,7 +11220,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 if (load->texel_offset.node) { vkd3d_string_buffer_printf(buffer, ", offset = "); -@@ -2279,6 +2474,21 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru +@@ -2279,6 +2594,21 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru vkd3d_string_buffer_printf(buffer, ", lod = "); dump_src(buffer, &load->lod); } @@ -6743,7 +11242,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 vkd3d_string_buffer_printf(buffer, ")"); } -@@ -2321,6 +2531,14 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls +@@ -2321,6 +2651,14 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls } } @@ -6758,7 +11257,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr) { if (instr->index) -@@ -2348,6 +2566,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, +@@ -2348,6 +2686,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_if(ctx, buffer, hlsl_ir_if(instr)); break; @@ -6769,7 +11268,16 @@ index 64d6e87065b..ba5bcfbfaf0 100644 case HLSL_IR_JUMP: dump_ir_jump(buffer, hlsl_ir_jump(instr)); break; -@@ -2421,7 +2643,7 @@ void hlsl_free_type(struct hlsl_type *type) +@@ -2392,7 +2734,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl + vkd3d_string_buffer_printf(&buffer, "\n"); + } + if (func->has_body) +- dump_instr_list(ctx, &buffer, &func->body.instrs); ++ dump_block(ctx, &buffer, &func->body); + + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); +@@ -2421,7 +2763,7 @@ void hlsl_free_type(struct hlsl_type *type) size_t i; vkd3d_free((void *)type->name); @@ -6778,7 +11286,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 { for (i = 0; i < type->e.record.field_count; ++i) { -@@ -2447,6 +2669,11 @@ void hlsl_free_instr_list(struct list *list) +@@ -2447,6 +2789,11 @@ void hlsl_free_instr_list(struct list *list) hlsl_free_instr(node); } @@ -6790,7 +11298,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 static void free_ir_call(struct hlsl_ir_call *call) { vkd3d_free(call); -@@ -2468,8 +2695,8 @@ static void free_ir_expr(struct hlsl_ir_expr *expr) +@@ -2468,14 +2815,15 @@ static void free_ir_expr(struct hlsl_ir_expr *expr) static void free_ir_if(struct hlsl_ir_if *if_node) { @@ -6801,7 +11309,14 @@ index 64d6e87065b..ba5bcfbfaf0 100644 hlsl_src_remove(&if_node->condition); vkd3d_free(if_node); } -@@ -2487,7 +2714,7 @@ static void free_ir_load(struct hlsl_ir_load *load) + + static void free_ir_jump(struct hlsl_ir_jump *jump) + { ++ hlsl_src_remove(&jump->condition); + vkd3d_free(jump); + } + +@@ -2487,7 +2835,7 @@ static void free_ir_load(struct hlsl_ir_load *load) static void free_ir_loop(struct hlsl_ir_loop *loop) { @@ -6810,7 +11325,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 vkd3d_free(loop); } -@@ -2497,7 +2724,11 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) +@@ -2497,7 +2845,11 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) hlsl_cleanup_deref(&load->resource); hlsl_src_remove(&load->coords); hlsl_src_remove(&load->lod); @@ -6822,7 +11337,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 vkd3d_free(load); } -@@ -2522,6 +2753,13 @@ static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) +@@ -2522,6 +2874,13 @@ static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) vkd3d_free(swizzle); } @@ -6836,7 +11351,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 void hlsl_free_instr(struct hlsl_ir_node *node) { assert(list_empty(&node->uses)); -@@ -2544,6 +2782,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) +@@ -2544,6 +2903,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_if(hlsl_ir_if(node)); break; @@ -6847,7 +11362,16 @@ index 64d6e87065b..ba5bcfbfaf0 100644 case HLSL_IR_JUMP: free_ir_jump(hlsl_ir_jump(node)); break; -@@ -2600,7 +2842,7 @@ static void free_function_decl(struct hlsl_ir_function_decl *decl) +@@ -2580,7 +2943,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) + + for (i = 0; i < attr->args_count; ++i) + hlsl_src_remove(&attr->args[i]); +- hlsl_free_instr_list(&attr->instrs); ++ hlsl_block_cleanup(&attr->instrs); + vkd3d_free((void *)attr->name); + vkd3d_free(attr); + } +@@ -2600,7 +2963,7 @@ static void free_function_decl(struct hlsl_ir_function_decl *decl) vkd3d_free((void *)decl->attrs); vkd3d_free(decl->parameters.vars); @@ -6856,7 +11380,24 @@ index 64d6e87065b..ba5bcfbfaf0 100644 vkd3d_free(decl); } -@@ -2826,11 +3068,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) +@@ -2626,6 +2989,16 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function + struct hlsl_ir_function *func; + struct rb_entry *func_entry; + ++ if (ctx->internal_func_name) ++ { ++ char *internal_name; ++ ++ if (!(internal_name = hlsl_strdup(ctx, ctx->internal_func_name))) ++ return; ++ vkd3d_free(name); ++ name = internal_name; ++ } ++ + func_entry = rb_get(&ctx->functions, name); + if (func_entry) + { +@@ -2826,11 +3199,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) static const char *const sampler_names[] = { @@ -6874,7 +11415,7 @@ index 64d6e87065b..ba5bcfbfaf0 100644 }; static const struct -@@ -2844,8 +3087,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) +@@ -2844,8 +3218,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) { {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, {"float", HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1}, @@ -6885,7 +11426,48 @@ index 64d6e87065b..ba5bcfbfaf0 100644 {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -@@ -2993,16 +3236,16 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, +@@ -2884,8 +3258,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + + for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) + { +- unsigned int n_variants = 0; + const char *const *variants; ++ unsigned int n_variants; + + switch (bt) + { +@@ -2905,6 +3279,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + break; + + default: ++ n_variants = 0; ++ variants = NULL; + break; + } + +@@ -2956,9 +3332,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + } + } + +-static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, ++static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, + const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) + { ++ unsigned int i; ++ + memset(ctx, 0, sizeof(*ctx)); + + ctx->profile = profile; +@@ -2967,7 +3345,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, + + if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) + return false; +- if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : ""))) ++ if (!(ctx->source_files[0] = hlsl_strdup(ctx, compile_info->source_name ? compile_info->source_name : ""))) + { + vkd3d_free(ctx->source_files); + return false; +@@ -2993,19 +3371,32 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, rb_init(&ctx->functions, compare_function_rb); @@ -6905,8 +11487,96 @@ index 64d6e87065b..ba5bcfbfaf0 100644 return false; ctx->cur_buffer = ctx->globals_buffer; ++ for (i = 0; i < compile_info->option_count; ++i) ++ { ++ const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; ++ ++ if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) ++ { ++ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) ++ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; ++ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) ++ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; ++ } ++ } ++ + return true; + } + +@@ -3017,6 +3408,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + struct hlsl_type *type, *next_type; + unsigned int i; + ++ hlsl_block_cleanup(&ctx->static_initializers); ++ + for (i = 0; i < ctx->source_files_count; ++i) + vkd3d_free((void *)ctx->source_files[i]); + vkd3d_free(ctx->source_files); +@@ -3040,6 +3433,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + vkd3d_free((void *)buffer->name); + vkd3d_free(buffer); + } ++ ++ vkd3d_free(ctx->constant_defs.regs); + } + + int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, +@@ -3081,7 +3476,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + return VKD3D_ERROR_INVALID_ARGUMENT; + } + +- if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) ++ if (!hlsl_ctx_init(&ctx, compile_info, profile, message_context)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) +@@ -3135,3 +3530,44 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + hlsl_ctx_cleanup(&ctx); + return ret; + } ++ ++struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl) ++{ ++ const struct hlsl_ir_function_decl *saved_cur_function = ctx->cur_function; ++ struct vkd3d_shader_code code = {.code = hlsl, .size = strlen(hlsl)}; ++ const char *saved_internal_func_name = ctx->internal_func_name; ++ struct vkd3d_string_buffer *internal_name; ++ struct hlsl_ir_function_decl *func; ++ void *saved_scanner = ctx->scanner; ++ int ret; ++ ++ TRACE("name %s, hlsl %s.\n", debugstr_a(name), debugstr_a(hlsl)); ++ ++ /* The actual name of the function is mangled with a unique prefix, both to ++ * allow defining multiple variants of a function with the same name, and to ++ * avoid polluting the user name space. */ ++ ++ if (!(internal_name = hlsl_get_string_buffer(ctx))) ++ return NULL; ++ vkd3d_string_buffer_printf(internal_name, "<%s-%u>", name, ctx->internal_name_counter++); ++ ++ /* Save and restore everything that matters. ++ * Note that saving the scope stack is hard, and shouldn't be necessary. */ ++ ++ ctx->scanner = NULL; ++ ctx->internal_func_name = internal_name->buffer; ++ ctx->cur_function = NULL; ++ ret = hlsl_lexer_compile(ctx, &code); ++ ctx->scanner = saved_scanner; ++ ctx->internal_func_name = saved_internal_func_name; ++ ctx->cur_function = saved_cur_function; ++ if (ret) ++ { ++ ERR("Failed to compile intrinsic, error %u.\n", ret); ++ hlsl_release_string_buffer(ctx, internal_name); ++ return NULL; ++ } ++ func = hlsl_get_func_decl(ctx, internal_name->buffer); ++ hlsl_release_string_buffer(ctx, internal_name); ++ return func; ++} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index ccbf22a5801..bce48e94b24 100644 +index ccbf22a5801..2cde5d58eba 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -21,10 +21,12 @@ @@ -7017,7 +11687,7 @@ index ccbf22a5801..bce48e94b24 100644 + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ -+ uint32_t bind_count; ++ uint32_t allocation_size; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ unsigned int writemask; @@ -7031,7 +11701,24 @@ index ccbf22a5801..bce48e94b24 100644 HLSL_IR_LOAD, HLSL_IR_LOOP, HLSL_IR_JUMP, -@@ -342,12 +365,17 @@ struct hlsl_attribute +@@ -314,7 +337,7 @@ struct hlsl_src + struct hlsl_attribute + { + const char *name; +- struct list instrs; ++ struct hlsl_block instrs; + struct vkd3d_shader_location loc; + unsigned int args_count; + struct hlsl_src args[]; +@@ -333,6 +356,7 @@ struct hlsl_attribute + #define HLSL_MODIFIER_COLUMN_MAJOR 0x00000400 + #define HLSL_STORAGE_IN 0x00000800 + #define HLSL_STORAGE_OUT 0x00001000 ++#define HLSL_MODIFIER_INLINE 0x00002000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +@@ -342,12 +366,17 @@ struct hlsl_attribute #define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0 @@ -7053,7 +11740,7 @@ index ccbf22a5801..bce48e94b24 100644 }; struct hlsl_ir_var -@@ -360,8 +388,7 @@ struct hlsl_ir_var +@@ -360,8 +389,7 @@ struct hlsl_ir_var struct hlsl_buffer *buffer; /* Bitfield for storage modifiers (type modifiers are stored in data_type->modifiers). */ unsigned int storage_modifiers; @@ -7063,7 +11750,7 @@ index ccbf22a5801..bce48e94b24 100644 struct hlsl_reg_reservation reg_reservation; /* Item entry in hlsl_scope.vars. Specifically hlsl_ctx.globals.vars if the variable is global. */ -@@ -384,6 +411,13 @@ struct hlsl_ir_var +@@ -384,10 +412,21 @@ struct hlsl_ir_var * and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1]; @@ -7073,11 +11760,19 @@ index ccbf22a5801..bce48e94b24 100644 + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; ++ /* Minimum number of binds required to include all object components actually used in the shader. ++ * It may be less than the allocation size, e.g. for texture arrays. */ ++ unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; -@@ -446,8 +480,8 @@ struct hlsl_ir_if + uint32_t is_param : 1; ++ uint32_t is_separated_resource : 1; + }; + + /* Sized array of variables representing a function's parameters. */ +@@ -446,8 +485,8 @@ struct hlsl_ir_if { struct hlsl_ir_node node; struct hlsl_src condition; @@ -7088,7 +11783,19 @@ index ccbf22a5801..bce48e94b24 100644 }; struct hlsl_ir_loop -@@ -485,6 +519,7 @@ enum hlsl_ir_expr_op +@@ -468,7 +507,11 @@ enum hlsl_ir_expr_op + HLSL_OP1_COS, + HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ + HLSL_OP1_DSX, ++ HLSL_OP1_DSX_COARSE, ++ HLSL_OP1_DSX_FINE, + HLSL_OP1_DSY, ++ HLSL_OP1_DSY_COARSE, ++ HLSL_OP1_DSY_FINE, + HLSL_OP1_EXP2, + HLSL_OP1_FLOOR, + HLSL_OP1_FRACT, +@@ -485,6 +528,7 @@ enum hlsl_ir_expr_op HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_SQRT, @@ -7096,7 +11803,43 @@ index ccbf22a5801..bce48e94b24 100644 HLSL_OP2_ADD, HLSL_OP2_BIT_AND, -@@ -540,6 +575,12 @@ struct hlsl_ir_swizzle +@@ -506,8 +550,15 @@ enum hlsl_ir_expr_op + HLSL_OP2_NEQUAL, + HLSL_OP2_RSHIFT, + ++ /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, ++ * then adds c. */ + HLSL_OP3_DP2ADD, +- HLSL_OP3_LERP, ++ /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. ++ * TERNARY(a, b, c) returns c if a == 0 and b otherwise. ++ * They differ for floating point numbers, because ++ * -0.0 == 0.0, but it is not bitwise zero. */ ++ HLSL_OP3_MOVC, ++ HLSL_OP3_TERNARY, + }; + + #define HLSL_MAX_OPERANDS 3 +@@ -523,7 +574,8 @@ enum hlsl_ir_jump_type + { + HLSL_IR_JUMP_BREAK, + HLSL_IR_JUMP_CONTINUE, +- HLSL_IR_JUMP_DISCARD, ++ HLSL_IR_JUMP_DISCARD_NEG, ++ HLSL_IR_JUMP_DISCARD_NZ, + HLSL_IR_JUMP_RETURN, + }; + +@@ -531,6 +583,8 @@ struct hlsl_ir_jump + { + struct hlsl_ir_node node; + enum hlsl_ir_jump_type type; ++ /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ ++ struct hlsl_src condition; + }; + + struct hlsl_ir_swizzle +@@ -540,6 +594,12 @@ struct hlsl_ir_swizzle DWORD swizzle; }; @@ -7109,7 +11852,21 @@ index ccbf22a5801..bce48e94b24 100644 /* Reference to a variable, or a part of it (e.g. a vector within a matrix within a struct). */ struct hlsl_deref { -@@ -574,7 +615,11 @@ enum hlsl_resource_load_type +@@ -559,9 +619,11 @@ struct hlsl_deref + * components, within the pertaining regset), from the start of the variable, of the part + * referenced. + * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- +- * before writing the bytecode. */ ++ * before writing the bytecode. ++ * Since the type information cannot longer be retrieved from the offset alone, the type is ++ * stored in the data_type field. */ + struct hlsl_src offset; +- enum hlsl_regset offset_regset; ++ struct hlsl_type *data_type; + }; + + struct hlsl_ir_load +@@ -574,11 +636,17 @@ enum hlsl_resource_load_type { HLSL_RESOURCE_LOAD, HLSL_RESOURCE_SAMPLE, @@ -7121,7 +11878,13 @@ index ccbf22a5801..bce48e94b24 100644 HLSL_RESOURCE_GATHER_RED, HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, -@@ -586,7 +631,8 @@ struct hlsl_ir_resource_load + HLSL_RESOURCE_GATHER_ALPHA, ++ HLSL_RESOURCE_SAMPLE_INFO, ++ HLSL_RESOURCE_RESINFO, + }; + + struct hlsl_ir_resource_load +@@ -586,7 +654,8 @@ struct hlsl_ir_resource_load struct hlsl_ir_node node; enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; @@ -7131,7 +11894,7 @@ index ccbf22a5801..bce48e94b24 100644 }; struct hlsl_ir_resource_store -@@ -607,13 +653,16 @@ struct hlsl_ir_store +@@ -607,13 +676,16 @@ struct hlsl_ir_store struct hlsl_ir_constant { struct hlsl_ir_node node; @@ -7154,7 +11917,7 @@ index ccbf22a5801..bce48e94b24 100644 /* Constant register of type 'c' where the constant value is stored for SM1. */ struct hlsl_reg reg; }; -@@ -674,6 +723,9 @@ struct hlsl_buffer +@@ -674,6 +746,9 @@ struct hlsl_buffer unsigned size, used_size; /* Register of type 'b' on which the buffer is allocated. */ struct hlsl_reg reg; @@ -7164,7 +11927,17 @@ index ccbf22a5801..bce48e94b24 100644 }; struct hlsl_ctx -@@ -744,9 +796,8 @@ struct hlsl_ctx +@@ -730,6 +805,9 @@ struct hlsl_ctx + /* Pointer to the current function; changes as the parser reads the code. */ + const struct hlsl_ir_function_decl *cur_function; + ++ /* Counter for generating unique internal variable names. */ ++ unsigned int internal_name_counter; ++ + /* Default matrix majority for matrix types. Can be set by a pragma within the HLSL source. */ + unsigned int matrix_majority; + +@@ -744,15 +822,18 @@ struct hlsl_ctx struct hlsl_type *Void; } builtin_types; @@ -7176,7 +11949,31 @@ index ccbf22a5801..bce48e94b24 100644 /* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. * Only used for SM1 profiles. */ -@@ -780,8 +831,9 @@ struct hlsl_resource_load_params + struct hlsl_constant_defs + { +- struct hlsl_vec4 *values; ++ struct hlsl_constant_register ++ { ++ uint32_t index; ++ struct hlsl_vec4 value; ++ } *regs; + size_t count, size; + } constant_defs; + /* Number of temp. registers required for the shader to run, i.e. the largest temp register +@@ -763,6 +844,12 @@ struct hlsl_ctx + * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ + uint32_t thread_count[3]; + ++ /* In some cases we generate opcodes by parsing an HLSL function and then ++ * invoking it. If not NULL, this field is the name of the function that we ++ * are currently parsing, "mangled" with an internal prefix to avoid ++ * polluting the user namespace. */ ++ const char *internal_func_name; ++ + /* Whether the parser is inside a state block (effects' metadata) inside a variable declaration. */ + uint32_t in_state_block : 1; + /* Whether the numthreads() attribute has been provided in the entry-point function. */ +@@ -780,8 +867,9 @@ struct hlsl_resource_load_params { struct hlsl_type *format; enum hlsl_resource_load_type type; @@ -7188,7 +11985,7 @@ index ccbf22a5801..bce48e94b24 100644 }; static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) -@@ -850,6 +902,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node +@@ -850,6 +938,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); } @@ -7216,7 +12013,7 @@ index ccbf22a5801..bce48e94b24 100644 static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) { src->node = node; -@@ -873,6 +946,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) +@@ -873,6 +982,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) return ptr; } @@ -7232,7 +12029,7 @@ index ccbf22a5801..bce48e94b24 100644 static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) { void *ret = vkd3d_realloc(ptr, size); -@@ -948,6 +1030,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) +@@ -948,6 +1066,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) switch (dim) { case HLSL_SAMPLER_DIM_1D: @@ -7241,12 +12038,25 @@ index ccbf22a5801..bce48e94b24 100644 return 1; case HLSL_SAMPLER_DIM_1DARRAY: case HLSL_SAMPLER_DIM_2D: -@@ -974,11 +1058,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -965,20 +1085,25 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) + } + } + ++char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) VKD3D_PRINTF_FUNC(2, 3); ++ + const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op); + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type); + const char *debug_hlsl_writemask(unsigned int writemask); + const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count); + + struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); ++struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, ++ unsigned int index); struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); -struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, ++struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); @@ -7255,7 +12065,7 @@ index ccbf22a5801..bce48e94b24 100644 bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); -@@ -986,6 +1071,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl +@@ -986,6 +1111,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); @@ -7263,7 +12073,7 @@ index ccbf22a5801..bce48e94b24 100644 bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); void hlsl_cleanup_deref(struct hlsl_deref *deref); -@@ -1012,64 +1098,73 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); +@@ -1012,64 +1138,77 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2); @@ -7275,10 +12085,10 @@ index ccbf22a5801..bce48e94b24 100644 struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, -- const struct vkd3d_shader_location *loc); --struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, +struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, +- const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); +struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc); @@ -7300,7 +12110,7 @@ index ccbf22a5801..bce48e94b24 100644 + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, -+ enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); ++ enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); @@ -7314,6 +12124,8 @@ index ccbf22a5801..bce48e94b24 100644 + const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); -struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); -struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, @@ -7345,6 +12157,8 @@ index ccbf22a5801..bce48e94b24 100644 struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, ++ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); @@ -7360,7 +12174,16 @@ index ccbf22a5801..bce48e94b24 100644 const struct hlsl_reg_reservation *reg_reservation); void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -@@ -1101,6 +1196,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); +@@ -1092,6 +1231,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); + unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); + struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, + unsigned int index); ++unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, ++ enum hlsl_regset regset, unsigned int index); + bool hlsl_type_is_row_major(const struct hlsl_type *type); + unsigned int hlsl_type_minor_size(const struct hlsl_type *type); + unsigned int hlsl_type_major_size(const struct hlsl_type *type); +@@ -1101,6 +1242,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); @@ -7370,7 +12193,7 @@ index ccbf22a5801..bce48e94b24 100644 unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); -@@ -1109,12 +1207,17 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); +@@ -1109,12 +1253,17 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); @@ -7388,15 +12211,19 @@ index ccbf22a5801..bce48e94b24 100644 bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); -@@ -1124,7 +1227,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun +@@ -1124,9 +1273,11 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); -+ bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); ++ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); ++struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); ++ int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); + + #endif diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index adff1da04d8..e9ae3ccf3d3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -7438,21 +12265,76 @@ index adff1da04d8..e9ae3ccf3d3 100644 FIXME("Malformed preprocessor line directive?\n"); BEGIN(INITIAL); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index fd1eaf6ec95..0e07fe578e1 100644 +index fd1eaf6ec95..fb6d485ea69 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -85,8 +85,8 @@ struct parse_function +@@ -53,7 +53,7 @@ struct parse_initializer + { + struct hlsl_ir_node **args; + unsigned int args_count; +- struct list *instrs; ++ struct hlsl_block *instrs; + bool braces; + }; + +@@ -73,6 +73,10 @@ struct parse_variable_def + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; + struct parse_initializer initializer; ++ ++ struct hlsl_type *basic_type; ++ unsigned int modifiers; ++ struct vkd3d_shader_location modifiers_loc; + }; + + struct parse_function +@@ -85,8 +89,8 @@ struct parse_function struct parse_if_body { - struct list *then_instrs; - struct list *else_instrs; -+ struct list *then_block; -+ struct list *else_block; ++ struct hlsl_block *then_block; ++ struct hlsl_block *else_block; }; enum parse_assign_op -@@ -164,7 +164,7 @@ static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct +@@ -129,9 +133,18 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); + } + +-static struct hlsl_ir_node *node_from_list(struct list *list) ++static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) ++{ ++ return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); ++} ++ ++static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) + { +- return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); ++ struct hlsl_block *block; ++ ++ if ((block = hlsl_alloc(ctx, sizeof(*block)))) ++ hlsl_block_init(block); ++ return block; + } + + static struct list *make_empty_list(struct hlsl_ctx *ctx) +@@ -143,10 +156,10 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) + return list; + } + +-static void destroy_instr_list(struct list *list) ++static void destroy_block(struct hlsl_block *block) + { +- hlsl_free_instr_list(list); +- vkd3d_free(list); ++ hlsl_block_cleanup(block); ++ vkd3d_free(block); + } + + static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, +@@ -164,7 +177,7 @@ static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct src_comp_type = hlsl_type_get_component_type(ctx, src, k); dst_comp_type = hlsl_type_get_component_type(ctx, dst, k); @@ -7461,7 +12343,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 && !hlsl_types_are_equal(src_comp_type, dst_comp_type)) return false; } -@@ -196,9 +196,9 @@ static bool type_contains_only_numerics(struct hlsl_type *type) +@@ -196,9 +209,9 @@ static bool type_contains_only_numerics(struct hlsl_type *type) { unsigned int i; @@ -7473,7 +12355,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { for (i = 0; i < type->e.record.field_count; ++i) { -@@ -207,23 +207,23 @@ static bool type_contains_only_numerics(struct hlsl_type *type) +@@ -207,23 +220,23 @@ static bool type_contains_only_numerics(struct hlsl_type *type) } return true; } @@ -7502,7 +12384,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false; -@@ -232,10 +232,10 @@ static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ +@@ -232,10 +245,10 @@ static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { @@ -7515,7 +12397,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { /* Scalar vars can be converted to any other numeric data type */ if (src->dimx == 1 && src->dimy == 1) -@@ -244,21 +244,21 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ +@@ -244,21 +257,21 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ if (dst->dimx == 1 && dst->dimy == 1) return true; @@ -7542,15 +12424,15 @@ index fd1eaf6ec95..0e07fe578e1 100644 return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); } -@@ -273,19 +273,19 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ +@@ -273,19 +286,16 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); } -static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -+static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc); - - static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, +- unsigned int comp, const struct vkd3d_shader_location *loc); +- +-static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; @@ -7565,7 +12447,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { unsigned int src_comp_count = hlsl_type_component_count(src_type); unsigned int dst_comp_count = hlsl_type_component_count(dst_type); -@@ -295,9 +295,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, +@@ -295,9 +305,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var; unsigned int dst_idx; @@ -7577,60 +12459,72 @@ index fd1eaf6ec95..0e07fe578e1 100644 assert(src_comp_count >= dst_comp_count || broadcast); if (matrix_cast) { -@@ -311,8 +311,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, +@@ -311,9 +321,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) { + struct hlsl_ir_node *component_load; struct hlsl_type *dst_comp_type; - struct hlsl_ir_store *store; - struct hlsl_block block; +- struct hlsl_block block; ++ struct hlsl_block store_block; unsigned int src_idx; -@@ -333,19 +333,19 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + if (broadcast) +@@ -333,21 +343,21 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - if (!(load = add_load_component(ctx, instrs, node, src_idx, loc))) -+ if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) ++ if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) return NULL; - if (!(cast = hlsl_new_cast(ctx, &load->node, dst_comp_type, loc))) + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); -+ list_add_tail(instrs, &cast->entry); ++ hlsl_block_add_instr(block, cast); - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, &cast->node))) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) ++ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) return NULL; - list_move_tail(instrs, &block.instrs); +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(block, &store_block); } - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &load->node.entry); +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); -@@ -355,8 +355,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + return &load->node; + } +@@ -355,12 +365,12 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); - return &cast->node; -+ list_add_tail(instrs, &cast->entry); ++ hlsl_block_add_instr(block, cast); + return cast; } } -@@ -384,19 +384,20 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct +-static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) + { + struct hlsl_type *src_type = node->data_type; +@@ -384,19 +394,20 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->type == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); - return add_cast(ctx, instrs, node, dst_type, loc); +- return add_cast(ctx, instrs, node, dst_type, loc); ++ return add_cast(ctx, block, node, dst_type, loc); } -static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, const struct vkd3d_shader_location loc) @@ -7647,9 +12541,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 "Modifier '%s' was already specified.", string->buffer); hlsl_release_string_buffer(ctx, string); return modifiers; -@@ -406,26 +407,27 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, con +@@ -404,28 +415,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, con + return modifiers | mod; + } - static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) +-static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) ++static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) { - struct hlsl_ir_node *condition, *not; - struct hlsl_ir_jump *jump; @@ -7658,19 +12555,22 @@ index fd1eaf6ec95..0e07fe578e1 100644 + struct hlsl_block then_block; /* E.g. "for (i = 0; ; ++i)". */ - if (list_empty(cond_list)) +- if (list_empty(cond_list)) ++ if (list_empty(&cond_block->instrs)) return true; - condition = node_from_list(cond_list); +- condition = node_from_list(cond_list); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, condition->loc))) ++ condition = node_from_block(cond_block); + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) return false; - list_add_tail(cond_list, ¬->entry); +- list_add_tail(cond_list, ¬->entry); ++ hlsl_block_add_instr(cond_block, not); - if (!(iff = hlsl_new_if(ctx, not, condition->loc))) + hlsl_block_init(&then_block); + -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) return false; - list_add_tail(cond_list, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump); @@ -7679,11 +12579,11 @@ index fd1eaf6ec95..0e07fe578e1 100644 + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; - list_add_head(&iff->then_instrs.instrs, &jump->node.entry); -+ list_add_tail(cond_list, &iff->entry); ++ hlsl_block_add_instr(cond_block, iff); return true; } -@@ -436,46 +438,87 @@ enum loop_type +@@ -436,50 +448,87 @@ enum loop_type LOOP_DO_WHILE }; @@ -7710,10 +12610,10 @@ index fd1eaf6ec95..0e07fe578e1 100644 + return false; +} + -+static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, -+ struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) ++static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, ++ const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, ++ struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) +{ -+ struct hlsl_block body_block; + struct hlsl_ir_node *loop; + unsigned int i; + @@ -7734,64 +12634,78 @@ index fd1eaf6ec95..0e07fe578e1 100644 + } + else + { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); ++ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); + } + } + else if (!strcmp(attr->name, "loop") + || !strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) + { -+ hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); ++ hlsl_fixme(ctx, loc, "Unhandled attribute '%s'.", attr->name); + } + else + { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); + } + } - if (!(loop = hlsl_new_loop(ctx, loc))) -+ if (!init && !(init = make_empty_list(ctx))) ++ if (!init && !(init = make_empty_block(ctx))) goto oom; - list_add_tail(list, &loop->node.entry); if (!append_conditional_break(ctx, cond)) goto oom; -+ hlsl_block_init(&body_block); -+ - if (type != LOOP_DO_WHILE) +- if (type != LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); -+ list_move_tail(&body_block.instrs, cond); - +- - list_move_tail(&loop->body.instrs, body); -+ list_move_tail(&body_block.instrs, body); - +- if (iter) - list_move_tail(&loop->body.instrs, iter); -+ list_move_tail(&body_block.instrs, iter); ++ hlsl_block_add_block(body, iter); if (type == LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); -+ list_move_tail(&body_block.instrs, cond); -+ -+ if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) -+ goto oom; -+ list_add_tail(init, &loop->entry); ++ list_move_tail(&body->instrs, &cond->instrs); ++ else ++ list_move_head(&body->instrs, &cond->instrs); - vkd3d_free(init); - vkd3d_free(cond); - vkd3d_free(body); +- vkd3d_free(cond); +- vkd3d_free(body); - return list; ++ if (!(loop = hlsl_new_loop(ctx, body, loc))) ++ goto oom; ++ hlsl_block_add_instr(init, loop); ++ ++ destroy_block(cond); ++ destroy_block(body); ++ destroy_block(iter); + return init; oom: - vkd3d_free(loop); - vkd3d_free(cond_jump); - vkd3d_free(list); - destroy_instr_list(init); - destroy_instr_list(cond); - destroy_instr_list(iter); -@@ -500,14 +543,14 @@ static void free_parse_initializer(struct parse_initializer *initializer) +- destroy_instr_list(init); +- destroy_instr_list(cond); +- destroy_instr_list(iter); +- destroy_instr_list(body); ++ destroy_block(init); ++ destroy_block(cond); ++ destroy_block(iter); ++ destroy_block(body); + return NULL; + } + +@@ -496,18 +545,18 @@ static unsigned int initializer_size(const struct parse_initializer *initializer + + static void free_parse_initializer(struct parse_initializer *initializer) + { +- destroy_instr_list(initializer->instrs); ++ destroy_block(initializer->instrs); vkd3d_free(initializer->args); } @@ -7808,13 +12722,13 @@ index fd1eaf6ec95..0e07fe578e1 100644 { /* Matrix swizzle */ bool m_swizzle; -@@ -582,224 +625,102 @@ static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_ +@@ -582,224 +631,102 @@ static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_ return NULL; } -static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *return_value, struct vkd3d_shader_location loc) -+static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, ++static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; @@ -7830,7 +12744,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 - if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) - return NULL; -+ if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) ++ if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) + return false; if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) @@ -7854,16 +12768,22 @@ index fd1eaf6ec95..0e07fe578e1 100644 + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); } - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) - return NULL; - list_add_tail(instrs, &jump->node.entry); -- ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) ++ return false; ++ hlsl_block_add_instr(block, jump); + - return jump; --} -- ++ return true; + } + -static struct hlsl_ir_load *add_load_index(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) --{ ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) + { - const struct hlsl_deref *src; - struct hlsl_ir_load *load; - @@ -7885,28 +12805,26 @@ index fd1eaf6ec95..0e07fe578e1 100644 - - src = &store->lhs; - } -- -- if (!(load = hlsl_new_load_index(ctx, src, idx, loc))) -- return NULL; -- list_add_tail(instrs, &load->node.entry); -+ return false; -+ list_add_tail(instrs, &jump->entry); - -- return load; -+ return true; - } - --static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -+static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc) - { -- const struct hlsl_deref *src; -- struct hlsl_ir_load *load; + struct hlsl_ir_node *load, *store; - struct hlsl_block block; ++ struct hlsl_block load_block; + struct hlsl_ir_var *var; + struct hlsl_deref src; +- if (!(load = hlsl_new_load_index(ctx, src, idx, loc))) ++ if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) + return NULL; +- list_add_tail(instrs, &load->node.entry); +- +- return load; +-} +- +-static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +- unsigned int comp, const struct vkd3d_shader_location *loc) +-{ +- const struct hlsl_deref *src; +- struct hlsl_ir_load *load; +- struct hlsl_block block; +- - if (var_instr->type == HLSL_IR_LOAD) - { - src = &hlsl_ir_load(var_instr)->src; @@ -7922,27 +12840,26 @@ index fd1eaf6ec95..0e07fe578e1 100644 - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; - list_add_tail(instrs, &store->node.entry); -+ if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) -+ return NULL; - src = &store->lhs; - } + if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) + return NULL; -+ list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); - if (!(load = hlsl_new_load_component(ctx, &block, src, comp, loc))) + hlsl_init_simple_deref_from_var(&src, var); -+ if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) ++ if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) return NULL; - list_move_tail(instrs, &block.instrs); +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(block, &load_block); return load; } -static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, - unsigned int idx, const struct vkd3d_shader_location loc) -+static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, ++static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *record, + unsigned int idx, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; @@ -7996,7 +12913,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 - - if (!(value = add_load_index(ctx, instrs, &column->node, index, loc))) - return false; -+ list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i, &value->node))) - return false; @@ -8007,7 +12924,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + if (!(index = hlsl_new_index(ctx, record, c, loc))) return false; - list_add_tail(instrs, &load->node.entry); -+ list_add_tail(instrs, &index->entry); ++ hlsl_block_add_instr(block, index); return true; } @@ -8044,12 +12961,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 - - return &coords_load->node; -} -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc); -static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, -+static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, ++static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *array, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; @@ -8072,14 +12989,19 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -814,20 +735,14 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls +@@ -810,24 +737,18 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls + return false; + } + +- if (!(index = add_implicit_conversion(ctx, instrs, index, ++ if (!(index = add_implicit_conversion(ctx, block, index, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) return false; - if (!(index = add_zero_mipmap_level(ctx, instrs, index, dim_count, loc))) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; -+ list_add_tail(instrs, &return_index->entry); ++ hlsl_block_add_instr(block, return_index); - load_params.format = expr_type->e.resource_format; - load_params.resource = object_load->src; @@ -8096,13 +13018,13 @@ index fd1eaf6ec95..0e07fe578e1 100644 { hlsl_error(ctx, &index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Array index is not scalar."); return false; -@@ -835,23 +750,21 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls +@@ -835,23 +756,21 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) return false; - list_add_tail(instrs, &cast->node.entry); - index = &cast->node; -+ list_add_tail(instrs, &cast->entry); ++ hlsl_block_add_instr(block, cast); + index = cast; - if (expr_type->type == HLSL_CLASS_MATRIX) @@ -8122,11 +13044,11 @@ index fd1eaf6ec95..0e07fe578e1 100644 - if (!add_load_index(ctx, instrs, array, index, loc)) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; -+ list_add_tail(instrs, &return_index->entry); ++ hlsl_block_add_instr(block, return_index); return true; } -@@ -877,12 +790,12 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_ +@@ -877,12 +796,12 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_ if (!(*modifiers & HLSL_MODIFIERS_MAJORITY_MASK) && !(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK) @@ -8141,7 +13063,23 @@ index fd1eaf6ec95..0e07fe578e1 100644 { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "'row_major' and 'column_major' modifiers are only allowed for matrices."); -@@ -923,7 +836,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -917,13 +836,23 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) + return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; + } + ++static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); ++} ++ ++static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) ++{ ++ return !shader_profile_version_ge(ctx, major, minor); ++} ++ + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + struct hlsl_type *type, unsigned int modifiers, struct list *defs) + { struct parse_variable_def *v, *v_next; size_t i = 0; @@ -8150,7 +13088,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); memset(fields, 0, sizeof(*fields)); -@@ -939,7 +852,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -939,7 +868,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->type = type; @@ -8159,7 +13097,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { for (k = 0; k < v->arrays.count; ++k) unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -983,6 +896,9 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -983,6 +912,9 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); free_parse_initializer(&v->initializer); } @@ -8169,7 +13107,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 vkd3d_free(v); } vkd3d_free(defs); -@@ -1052,18 +968,23 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, +@@ -1052,18 +984,23 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, } static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, @@ -8197,7 +13135,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 return false; var->is_param = 1; -@@ -1084,12 +1005,61 @@ static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) +@@ -1084,12 +1021,61 @@ static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) { struct hlsl_reg_reservation reservation = {0}; @@ -8218,7 +13156,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + -+ if (ctx->profile->major_version < 4) ++ if (shader_profile_version_lt(ctx, 4, 0)) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); @@ -8261,8 +13199,27 @@ index fd1eaf6ec95..0e07fe578e1 100644 return reservation; } -@@ -1122,53 +1092,37 @@ static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) - return list; +@@ -1109,66 +1095,82 @@ static struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, + return NULL; + } + +-static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) ++static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) + { +- struct list *list; ++ struct hlsl_block *block; + +- if (!(list = make_empty_list(ctx))) ++ if (!(block = make_empty_block(ctx))) + { +- hlsl_free_instr(node); ++ hlsl_free_instr(instr); + return NULL; + } +- list_add_tail(list, &node->entry); +- return list; ++ hlsl_block_add_instr(block, instr); ++ return block; } -static unsigned int evaluate_static_expression(struct hlsl_ir_node *node) @@ -8270,20 +13227,19 @@ index fd1eaf6ec95..0e07fe578e1 100644 + const struct vkd3d_shader_location *loc) { - if (node->data_type->type != HLSL_CLASS_SCALAR) +- return 0; + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; ++ struct hlsl_block expr; + unsigned int ret = 0; + bool progress; -+ -+ if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) - return 0; - switch (node->type) -+ do ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) { - case HLSL_IR_CONSTANT: -- { ++ switch (node->type) + { - struct hlsl_ir_constant *constant = hlsl_ir_constant(node); - const union hlsl_constant_value *value = &constant->value[0]; - @@ -8303,17 +13259,33 @@ index fd1eaf6ec95..0e07fe578e1 100644 - default: - vkd3d_unreachable(); - } -- } -- ++ case HLSL_IR_CONSTANT: ++ case HLSL_IR_EXPR: ++ case HLSL_IR_SWIZZLE: ++ case HLSL_IR_LOAD: ++ case HLSL_IR_INDEX: ++ continue; ++ case HLSL_IR_CALL: ++ case HLSL_IR_IF: ++ case HLSL_IR_LOOP: ++ case HLSL_IR_JUMP: ++ case HLSL_IR_RESOURCE_LOAD: ++ case HLSL_IR_RESOURCE_STORE: ++ case HLSL_IR_STORE: ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Expected literal expression."); + } ++ } + - case HLSL_IR_EXPR: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_SWIZZLE: - FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); - return 0; -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, block); -+ } while (progress); ++ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) ++ return 0; ++ hlsl_block_add_block(&expr, block); - case HLSL_IR_CALL: - case HLSL_IR_IF: @@ -8322,7 +13294,20 @@ index fd1eaf6ec95..0e07fe578e1 100644 - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - vkd3d_unreachable(); -+ node = node_from_list(&block->instrs); ++ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) ++ { ++ hlsl_block_cleanup(&expr); ++ return 0; ++ } ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, &expr); ++ } while (progress); ++ ++ node = node_from_block(&expr); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); @@ -8331,15 +13316,17 @@ index fd1eaf6ec95..0e07fe578e1 100644 + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Failed to evaluate constant expression %d.", node->type); ++ "Failed to evaluate constant expression."); } - vkd3d_unreachable(); ++ hlsl_block_cleanup(&expr); ++ + return ret; } static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) -@@ -1180,20 +1134,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t +@@ -1180,20 +1182,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) return true; @@ -8365,7 +13352,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 } /* Both matrices */ -@@ -1226,7 +1180,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl +@@ -1226,7 +1228,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) { @@ -8374,7 +13361,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -1237,7 +1191,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct +@@ -1237,7 +1239,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return false; } @@ -8383,7 +13370,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -1264,17 +1218,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct +@@ -1264,17 +1266,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct if (t1->dimx == 1 && t1->dimy == 1) { @@ -8404,7 +13391,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { *type = HLSL_CLASS_MATRIX; *dimx = min(t1->dimx, t2->dimx); -@@ -1284,13 +1238,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct +@@ -1284,13 +1286,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct { if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) { @@ -8420,7 +13407,15 @@ index fd1eaf6ec95..0e07fe578e1 100644 *dimx = t2->dimx; *dimy = t2->dimy; } -@@ -1306,55 +1260,50 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, +@@ -1299,67 +1301,62 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct + return true; + } + +-static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], + struct hlsl_type *type, const struct vkd3d_shader_location *loc) + { struct hlsl_ir_node *expr; unsigned int i; @@ -8449,7 +13444,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; -+ struct hlsl_block block; ++ struct hlsl_block store_block; unsigned int j; - if (!(c = hlsl_new_uint_constant(ctx, i, loc))) @@ -8463,7 +13458,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 - struct hlsl_ir_load *load; - - if (!(load = add_load_index(ctx, instrs, operands[j], &c->node, loc))) -+ if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) return NULL; - vector_operands[j] = &load->node; + @@ -8472,28 +13467,61 @@ index fd1eaf6ec95..0e07fe578e1 100644 } - if (!(value = add_expr(ctx, instrs, op, vector_operands, vector_type, loc))) -+ if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) ++ if (!(value = add_expr(ctx, block, op, cell_operands, scalar_type, loc))) return NULL; - if (!(store = hlsl_new_store_index(ctx, &var_deref, &c->node, value, 0, loc))) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) ++ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, value)) return NULL; - list_add_tail(instrs, &store->node.entry); -+ list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(block, &store_block); } - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &load->node.entry); -+ list_add_tail(instrs, &var_load->node.entry); ++ hlsl_block_add_instr(block, &var_load->node); - return &load->node; + return &var_load->node; } if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) -@@ -1407,7 +1356,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct + return NULL; +- list_add_tail(instrs, &expr->entry); ++ hlsl_block_add_instr(block, expr); + + return expr; + } +@@ -1385,95 +1382,79 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * + } + } + +-static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; + +- return add_expr(ctx, instrs, op, args, arg->data_type, loc); ++ return add_expr(ctx, block, op, args, arg->data_type, loc); + } + +-static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { + check_integer_type(ctx, arg); + +- return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); ++ return add_unary_arithmetic_expr(ctx, block, op, arg, loc); + } + +-static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type; @@ -8501,9 +13529,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy); - if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) -@@ -1416,20 +1365,27 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - return add_expr(ctx, instrs, op, args, bool_type, loc); +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, bool_type, loc); ++ return add_expr(ctx, block, op, args, bool_type, loc); } -static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, @@ -8524,51 +13555,167 @@ index fd1eaf6ec95..0e07fe578e1 100644 - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); +} -+ -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) +- return NULL; ++static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *common_type; -+ + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) +- return NULL; + common_type = get_common_numeric_type(ctx, arg1, arg2, loc); - if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) - return NULL; -@@ -1441,13 +1397,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str - } +- return add_expr(ctx, instrs, op, args, common_type, loc); +-} ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) ++ return NULL; - static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +-static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) -+ enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) ++ return NULL; - list_move_tail(list1, list2); - vkd3d_free(list2); +- list_move_tail(list1, list2); +- vkd3d_free(list2); - add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, &loc); -+ add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); - return list1; +- return list1; ++ return add_expr(ctx, block, op, args, common_type, loc); } -@@ -1499,13 +1455,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - } - - static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) -+ enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + check_integer_type(ctx, arg1); + check_integer_type(ctx, arg2); - list_move_tail(list1, list2); - vkd3d_free(list2); -- add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); -+ add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); - return list1; +- return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); +-} +- +-static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); +- +- return list1; ++ return add_binary_arithmetic_expr(ctx, block, op, arg1, arg2, loc); } -@@ -1596,7 +1552,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis +-static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +@@ -1489,27 +1470,16 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str + common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) + return NULL; + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, return_type, loc); +-} +- +-static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); +- return list1; ++ return add_expr(ctx, block, op, args, return_type, loc); + } + +-static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +@@ -1523,28 +1493,16 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct + + common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) + return NULL; + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, common_type, loc); +-} +- +-static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); +- +- return list1; ++ return add_expr(ctx, block, op, args, common_type, loc); + } + +-static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { +@@ -1566,28 +1524,16 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l + return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); + +- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) ++ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) + return NULL; + +- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) ++ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) + return NULL; + +- return add_expr(ctx, instrs, op, args, return_type, loc); +-} +- +-static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); +- +- list_move_tail(list1, list2); +- vkd3d_free(list2); +- add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); +- +- return list1; ++ return add_expr(ctx, block, op, args, return_type, loc); + } + +-static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, ++static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) + { + enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); +@@ -1596,31 +1542,29 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis enum hlsl_ir_expr_op op; unsigned dim; @@ -8577,7 +13724,11 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -1607,7 +1563,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + if ((string = hlsl_type_to_string(ctx, arg1->data_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s.\n", string->buffer); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); return NULL; } @@ -8586,7 +13737,11 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -1618,9 +1574,9 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + if ((string = hlsl_type_to_string(ctx, arg2->data_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s.\n", string->buffer); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); return NULL; } @@ -8598,7 +13753,107 @@ index fd1eaf6ec95..0e07fe578e1 100644 dim = arg1->data_type->dimx; else dim = min(arg1->data_type->dimx, arg2->data_type->dimx); -@@ -1702,7 +1658,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1642,28 +1586,75 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + return add_expr(ctx, instrs, op, args, ret_type, loc); + } + +-static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) ++static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, ++ struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) + { +- static const enum hlsl_ir_expr_op ops[] = +- { +- 0, +- HLSL_OP2_ADD, +- 0, +- HLSL_OP2_MUL, +- HLSL_OP2_DIV, +- HLSL_OP2_MOD, +- HLSL_OP2_LSHIFT, +- HLSL_OP2_RSHIFT, +- HLSL_OP2_BIT_AND, +- HLSL_OP2_BIT_OR, +- HLSL_OP2_BIT_XOR, +- }; ++ struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); + +- return ops[op]; +-} ++ hlsl_block_add_block(block1, block2); ++ destroy_block(block2); + +-static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsigned int *ret_width) +-{ ++ switch (op) ++ { ++ case HLSL_OP2_ADD: ++ case HLSL_OP2_DIV: ++ case HLSL_OP2_MOD: ++ case HLSL_OP2_MUL: ++ add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_BIT_AND: ++ case HLSL_OP2_BIT_OR: ++ case HLSL_OP2_BIT_XOR: ++ add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_LESS: ++ case HLSL_OP2_GEQUAL: ++ case HLSL_OP2_EQUAL: ++ case HLSL_OP2_NEQUAL: ++ add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_LOGIC_AND: ++ case HLSL_OP2_LOGIC_OR: ++ add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ case HLSL_OP2_LSHIFT: ++ case HLSL_OP2_RSHIFT: ++ add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ return block1; ++} ++ ++static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) ++{ ++ static const enum hlsl_ir_expr_op ops[] = ++ { ++ 0, ++ HLSL_OP2_ADD, ++ 0, ++ HLSL_OP2_MUL, ++ HLSL_OP2_DIV, ++ HLSL_OP2_MOD, ++ HLSL_OP2_LSHIFT, ++ HLSL_OP2_RSHIFT, ++ HLSL_OP2_BIT_AND, ++ HLSL_OP2_BIT_OR, ++ HLSL_OP2_BIT_XOR, ++ }; ++ ++ return ops[op]; ++} ++ ++static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsigned int *ret_width) ++{ + unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; + + /* Apply the writemask to the swizzle to get a new writemask and swizzle. */ +@@ -1698,16 +1689,16 @@ static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsig + return true; + } + +-static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, ++static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; @@ -8607,7 +13862,18 @@ index fd1eaf6ec95..0e07fe578e1 100644 unsigned int writemask = 0; if (assign_op == ASSIGN_OP_SUB) -@@ -1720,13 +1676,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + { +- if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) ++ if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) + return NULL; + assign_op = ASSIGN_OP_ADD; + } +@@ -1716,17 +1707,17 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + enum hlsl_ir_expr_op op = op_from_assignment(assign_op); + + assert(op); +- if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) ++ if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) return NULL; } @@ -8615,7 +13881,8 @@ index fd1eaf6ec95..0e07fe578e1 100644 + if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) writemask = (1 << lhs_type->dimx) - 1; - if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) +- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) ++ if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) return NULL; - while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_RESOURCE_LOAD) @@ -8623,7 +13890,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { -@@ -1735,10 +1691,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1735,10 +1726,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } else if (lhs->type == HLSL_IR_SWIZZLE) { @@ -8637,12 +13904,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); if (!invert_swizzle(&s, &writemask, &width)) -@@ -1751,10 +1708,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1751,10 +1743,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { return NULL; } - list_add_tail(instrs, &new_swizzle->node.entry); -+ list_add_tail(instrs, &new_swizzle->entry); ++ hlsl_block_add_instr(block, new_swizzle); lhs = swizzle->val.node; - rhs = &new_swizzle->node; @@ -8650,7 +13917,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 } else { -@@ -1763,18 +1720,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1763,18 +1755,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } } @@ -8678,7 +13945,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); if (resource_type->base_type != HLSL_TYPE_UAV) -@@ -1787,25 +1745,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1787,25 +1780,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components."); @@ -8700,7 +13967,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 return NULL; - list_add_tail(instrs, &store->node.entry); + } -+ list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&resource_deref); + } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) @@ -8719,13 +13986,13 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) + return NULL; -+ list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); + + if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) + return NULL; -+ list_add_tail(instrs, &cell->entry); ++ hlsl_block_add_instr(block, cell); + -+ if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) ++ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) + return NULL; + + if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) @@ -8736,7 +14003,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + hlsl_cleanup_deref(&deref); + return NULL; + } -+ list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&deref); + } } @@ -8745,38 +14012,40 @@ index fd1eaf6ec95..0e07fe578e1 100644 - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; + struct hlsl_deref deref; ++ ++ if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) ++ return NULL; - if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) -+ if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) - return NULL; -- list_add_tail(instrs, &store->node.entry); -+ + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); -+ return NULL; + return NULL; +- list_add_tail(instrs, &store->node.entry); + } -+ list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&deref); } /* Don't use the instruction itself as a source, as this makes structure -@@ -1813,37 +1816,37 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in +@@ -1813,44 +1851,44 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in * the last instruction in the list, we do need to copy. */ if (!(copy = hlsl_new_copy(ctx, rhs))) return NULL; - list_add_tail(instrs, ©->node.entry); - return ©->node; -+ list_add_tail(instrs, ©->entry); ++ hlsl_block_add_instr(block, copy); + return copy; } - static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, +-static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, - struct vkd3d_shader_location loc) ++static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, + const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *lhs = node_from_list(instrs); +- struct hlsl_ir_node *lhs = node_from_list(instrs); - struct hlsl_ir_constant *one; ++ struct hlsl_ir_node *lhs = node_from_block(block); + struct hlsl_ir_node *one; if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) @@ -8788,10 +14057,10 @@ index fd1eaf6ec95..0e07fe578e1 100644 + if (!(one = hlsl_new_int_constant(ctx, 1, loc))) return false; - list_add_tail(instrs, &one->node.entry); -+ list_add_tail(instrs, &one->entry); ++ hlsl_block_add_instr(block, one); - if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, &one->node)) -+ if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) ++ if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) return false; if (post) @@ -8802,7 +14071,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(copy = hlsl_new_copy(ctx, lhs))) return false; - list_add_tail(instrs, ©->node.entry); -+ list_add_tail(instrs, ©->entry); ++ hlsl_block_add_instr(block, copy); /* Post increment/decrement expressions are considered const. */ - if (!(copy->node.data_type = hlsl_type_clone(ctx, copy->node.data_type, 0, HLSL_MODIFIER_CONST))) @@ -8810,7 +14079,15 @@ index fd1eaf6ec95..0e07fe578e1 100644 return false; } -@@ -1861,10 +1864,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, + return true; + } + +-static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, ++static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) + { + unsigned int src_comp_count = hlsl_type_component_count(src->data_type); +@@ -1861,23 +1899,21 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, for (k = 0; k < src_comp_count; ++k) { @@ -8821,8 +14098,9 @@ index fd1eaf6ec95..0e07fe578e1 100644 - struct hlsl_ir_node *conv; struct hlsl_block block; - if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) -@@ -1872,10 +1873,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) ++ if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) + return; dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); @@ -8833,9 +14111,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 - if (!(store = hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))) + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) return; - list_move_tail(instrs, &block.instrs); +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(instrs, &block); -@@ -1885,12 +1886,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, + ++*store_index; + } +@@ -1885,12 +1921,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) { @@ -8851,7 +14132,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { unsigned int i; -@@ -1905,12 +1906,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s +@@ -1905,12 +1941,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s static bool type_has_numeric_components(struct hlsl_type *type) { @@ -8867,48 +14148,400 @@ index fd1eaf6ec95..0e07fe578e1 100644 { unsigned int i; -@@ -1934,7 +1935,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +@@ -1923,204 +1959,231 @@ static bool type_has_numeric_components(struct hlsl_type *type) + return false; + } + +-static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, +- unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) ++static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, ++ const struct vkd3d_shader_location *loc) + { +- struct parse_variable_def *v, *v_next; ++ modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); ++ if (modifiers) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_modifiers_to_string(ctx, modifiers))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++} ++ ++static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) ++{ ++ struct hlsl_type *basic_type = v->basic_type; + struct hlsl_ir_function_decl *func; +- unsigned int invalid_modifiers; +- struct list *statements_list; ++ struct hlsl_semantic new_semantic; ++ uint32_t modifiers = v->modifiers; ++ bool unbounded_res_array = false; + struct hlsl_ir_var *var; struct hlsl_type *type; bool local = true; ++ char *var_name; ++ unsigned int i; - if (basic_type->type == HLSL_CLASS_MATRIX) +- assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ assert(basic_type); + +- if (!(statements_list = make_empty_list(ctx))) +- { +- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) +- free_parse_variable_def(v); +- vkd3d_free(var_list); +- return NULL; +- } + if (basic_type->class == HLSL_CLASS_MATRIX) - assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (!(statements_list = make_empty_list(ctx))) -@@ -1966,7 +1967,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +- if (!var_list) +- return statements_list; ++ type = basic_type; - type = basic_type; +- invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); +- if (invalid_modifiers) ++ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) +- hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); +- hlsl_release_string_buffer(ctx, string); ++ for (i = 0; i < v->arrays.count; ++i) ++ unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); + } +- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) ++ if (unbounded_res_array) + { +- bool unbounded_res_array = false; +- unsigned int i; +- +- type = basic_type; +- - if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) -+ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) ++ if (v->arrays.count == 1) { - for (i = 0; i < v->arrays.count; ++i) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -2035,7 +2036,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +- for (i = 0; i < v->arrays.count; ++i) +- unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); ++ hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); ++ return; } - vkd3d_free(v->arrays.sizes); +- +- if (unbounded_res_array) ++ else + { +- if (v->arrays.count == 1) +- { +- hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); +- free_parse_variable_def(v); +- continue; +- } +- else +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Unbounded resource arrays cannot be multi-dimensional."); +- } ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Unbounded resource arrays cannot be multi-dimensional."); + } +- else ++ } ++ else ++ { ++ for (i = 0; i < v->arrays.count; ++i) + { +- for (i = 0; i < v->arrays.count; ++i) ++ if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) + { +- if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) +- { +- unsigned int size = initializer_size(&v->initializer); +- unsigned int elem_components = hlsl_type_component_count(type); +- +- if (i < v->arrays.count - 1) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Only innermost array size can be implicit."); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; +- } +- else if (elem_components == 0) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Cannot declare an implicit size array of a size 0 type."); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; +- } +- else if (size == 0) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Implicit size arrays need to be initialized."); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; ++ unsigned int size = initializer_size(&v->initializer); ++ unsigned int elem_components = hlsl_type_component_count(type); + +- } +- else if (size % elem_components != 0) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Cannot initialize implicit size array with %u components, expected a multiple of %u.", +- size, elem_components); +- free_parse_initializer(&v->initializer); +- v->initializer.args_count = 0; +- } +- else +- { +- v->arrays.sizes[i] = size / elem_components; +- } ++ if (i < v->arrays.count - 1) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Only innermost array size can be implicit."); ++ v->initializer.args_count = 0; ++ } ++ else if (elem_components == 0) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Cannot declare an implicit size array of a size 0 type."); ++ v->initializer.args_count = 0; ++ } ++ else if (size == 0) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Implicit size arrays need to be initialized."); ++ v->initializer.args_count = 0; ++ } ++ else if (size % elem_components != 0) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Cannot initialize implicit size array with %u components, expected a multiple of %u.", ++ size, elem_components); ++ v->initializer.args_count = 0; ++ } ++ else ++ { ++ v->arrays.sizes[i] = size / elem_components; + } +- type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); + } ++ type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); + } +- vkd3d_free(v->arrays.sizes); ++ } - if (!(var = hlsl_new_var(ctx, v->name, type, v->loc, &v->semantic, modifiers, &v->reg_reservation))) -+ if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) - { - free_parse_variable_def(v); - continue; -@@ -2043,6 +2044,13 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - - var->buffer = ctx->cur_buffer; - -+ if (var->buffer == ctx->globals_buffer) -+ { -+ if (var->reg_reservation.offset_type) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() is only allowed inside constant buffer declarations."); -+ } ++ if (!(var_name = vkd3d_strdup(v->name))) ++ return; + - if (ctx->cur_scope == ctx->globals) ++ new_semantic = v->semantic; ++ if (v->semantic.name) ++ { ++ if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) { - local = false; -@@ -2148,7 +2156,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +- free_parse_variable_def(v); +- continue; ++ vkd3d_free(var_name); ++ return; + } ++ } + +- var->buffer = ctx->cur_buffer; ++ if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) ++ { ++ hlsl_cleanup_semantic(&new_semantic); ++ vkd3d_free(var_name); ++ return; ++ } + +- if (ctx->cur_scope == ctx->globals) +- { +- local = false; ++ var->buffer = ctx->cur_buffer; + +- if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); ++ if (var->buffer == ctx->globals_buffer) ++ { ++ if (var->reg_reservation.offset_type) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() is only allowed inside constant buffer declarations."); ++ } + +- /* Mark it as uniform. We need to do this here since synthetic +- * variables also get put in the global scope, but shouldn't be +- * considered uniforms, and we have no way of telling otherwise. */ +- if (!(modifiers & HLSL_STORAGE_STATIC)) +- var->storage_modifiers |= HLSL_STORAGE_UNIFORM; ++ if (ctx->cur_scope == ctx->globals) ++ { ++ local = false; + +- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && +- type_has_object_components(var->data_type, true)) +- { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Target profile doesn't support objects as struct members in uniform variables.\n"); +- } ++ if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); + +- if ((func = hlsl_get_func_decl(ctx, var->name))) +- { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, +- "'%s' is already defined as a function.", var->name); +- hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, +- "'%s' was previously defined here.", var->name); +- } +- } +- else +- { +- static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED +- | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; ++ /* Mark it as uniform. We need to do this here since synthetic ++ * variables also get put in the global scope, but shouldn't be ++ * considered uniforms, and we have no way of telling otherwise. */ ++ if (!(modifiers & HLSL_STORAGE_STATIC)) ++ var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + +- if (modifiers & invalid) +- { +- struct vkd3d_string_buffer *string; ++ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && ++ type_has_object_components(var->data_type, true)) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Target profile doesn't support objects as struct members in uniform variables."); ++ } + +- if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Modifiers '%s' are not allowed on local variables.", string->buffer); +- hlsl_release_string_buffer(ctx, string); +- } +- if (var->semantic.name) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, +- "Semantics are not allowed on local variables."); ++ if ((func = hlsl_get_func_decl(ctx, var->name))) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, ++ "'%s' is already defined as a function.", var->name); ++ hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, ++ "'%s' was previously defined here.", var->name); + } ++ } ++ else ++ { ++ static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED ++ | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; + +- if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) +- && type_has_object_components(var->data_type, false)) ++ if (modifiers & invalid) + { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Static variables cannot have both numeric and resource components."); ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Modifiers '%s' are not allowed on local variables.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); + } ++ if (var->semantic.name) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "Semantics are not allowed on local variables."); + +- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count +- && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) ++ if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, +- "Const variable \"%s\" is missing an initializer.", var->name); +- hlsl_free_var(var); +- free_parse_initializer(&v->initializer); +- vkd3d_free(v); +- continue; ++ "Const variable \"%s\" is missing an initializer.", var->name); + } ++ } ++ ++ if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) ++ && type_has_object_components(var->data_type, false)) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Static variables cannot have both numeric and resource components."); ++ } ++ ++ if (!hlsl_add_var(ctx, var, local)) ++ { ++ struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); ++ ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, ++ "Variable \"%s\" was already declared in this scope.", var->name); ++ hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); ++ hlsl_free_var(var); ++ return; ++ } ++} + +- if (!hlsl_add_var(ctx, var, local)) ++static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) ++{ ++ struct parse_variable_def *v, *v_next; ++ struct hlsl_block *initializers; ++ struct hlsl_ir_var *var; ++ struct hlsl_type *type; ++ ++ if (!(initializers = make_empty_block(ctx))) ++ { ++ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + { +- struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); ++ free_parse_variable_def(v); ++ } ++ vkd3d_free(var_list); ++ return NULL; ++ } + +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, +- "Variable \"%s\" was already declared in this scope.", var->name); +- hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); +- hlsl_free_var(var); +- free_parse_initializer(&v->initializer); +- vkd3d_free(v); ++ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) ++ { ++ /* If this fails, the variable failed to be declared. */ ++ if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) ++ { ++ free_parse_variable_def(v); + continue; + } ++ type = var->data_type; + + if (v->initializer.args_count) + { +@@ -2135,8 +2198,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", + hlsl_type_component_count(type), size); +- free_parse_initializer(&v->initializer); +- vkd3d_free(v); ++ free_parse_variable_def(v); + continue; + } + +@@ -2148,58 +2210,55 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else { @@ -8916,17 +14549,20 @@ index fd1eaf6ec95..0e07fe578e1 100644 + struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); assert(v->initializer.args_count == 1); - list_add_tail(v->initializer.instrs, &load->node.entry); -@@ -2156,7 +2164,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +- list_add_tail(v->initializer.instrs, &load->node.entry); ++ hlsl_block_add_instr(v->initializer.instrs, &load->node); + add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); } - if (modifiers & HLSL_STORAGE_STATIC) +- if (modifiers & HLSL_STORAGE_STATIC) - list_move_tail(&ctx->static_initializers, v->initializer.instrs); -+ list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); ++ if (var->storage_modifiers & HLSL_STORAGE_STATIC) ++ hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); else - list_move_tail(statements_list, v->initializer.instrs); - vkd3d_free(v->initializer.args); -@@ -2164,9 +2172,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t +- list_move_tail(statements_list, v->initializer.instrs); +- vkd3d_free(v->initializer.args); +- vkd3d_free(v->initializer.instrs); ++ hlsl_block_add_block(initializers, v->initializer.instrs); } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -8937,37 +14573,188 @@ index fd1eaf6ec95..0e07fe578e1 100644 /* Initialize statics to zero by default. */ -@@ -2181,9 +2187,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - vkd3d_free(v); + if (type_has_object_components(var->data_type, false)) + { +- vkd3d_free(v); ++ free_parse_variable_def(v); + continue; + } + + if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) + { +- vkd3d_free(v); ++ free_parse_variable_def(v); continue; } - list_add_tail(&ctx->static_initializers, &zero->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, zero); - if (!(cast = add_cast(ctx, &ctx->static_initializers, &zero->node, var->data_type, &var->loc))) -+ if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) ++ if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) { - vkd3d_free(v); +- vkd3d_free(v); ++ free_parse_variable_def(v); continue; -@@ -2194,7 +2200,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - vkd3d_free(v); + } + + if (!(store = hlsl_new_simple_store(ctx, var, cast))) + { +- vkd3d_free(v); ++ free_parse_variable_def(v); continue; } - list_add_tail(&ctx->static_initializers, &store->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, store); } - vkd3d_free(v); +- vkd3d_free(v); ++ free_parse_variable_def(v); } -@@ -2279,7 +2285,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) - return arg; - -- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - return add_implicit_conversion(ctx, params->instrs, arg, type, loc); ++ + vkd3d_free(var_list); +- return statements_list; ++ return initializers; } -@@ -2315,12 +2321,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * + struct find_function_call_args +@@ -2271,34 +2330,120 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, + return args.decl; + } + +-static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, +- const struct parse_initializer *params, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) ++static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) + { +- struct hlsl_type *type = arg->data_type; +- +- if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) +- return arg; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + +- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); +- return add_implicit_conversion(ctx, params->instrs, arg, type, loc); ++ return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); + } + +-static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, +- struct hlsl_type *type, const struct vkd3d_shader_location *loc) ++static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ const struct parse_initializer *args, const struct vkd3d_shader_location *loc) + { ++ struct hlsl_ir_node *call; + unsigned int i; + +- for (i = 0; i < params->args_count; ++i) ++ assert(args->args_count == func->parameters.count); ++ ++ for (i = 0; i < func->parameters.count; ++i) + { +- struct hlsl_ir_node *new_arg; ++ struct hlsl_ir_var *param = func->parameters.vars[i]; ++ struct hlsl_ir_node *arg = args->args[i]; + +- if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) +- return false; +- params->args[i] = new_arg; ++ if (!hlsl_types_are_equal(arg->data_type, param->data_type)) ++ { ++ struct hlsl_ir_node *cast; ++ ++ if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) ++ return false; ++ args->args[i] = cast; ++ arg = cast; ++ } ++ ++ if (param->storage_modifiers & HLSL_STORAGE_IN) ++ { ++ struct hlsl_ir_node *store; ++ ++ if (!(store = hlsl_new_simple_store(ctx, param, arg))) ++ return false; ++ hlsl_block_add_instr(args->instrs, store); ++ } + } + +- return true; +-} ++ if (!(call = hlsl_new_call(ctx, func, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, call); ++ ++ for (i = 0; i < func->parameters.count; ++i) ++ { ++ struct hlsl_ir_var *param = func->parameters.vars[i]; ++ struct hlsl_ir_node *arg = args->args[i]; ++ ++ if (param->storage_modifiers & HLSL_STORAGE_OUT) ++ { ++ struct hlsl_ir_load *load; ++ ++ if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) ++ hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, ++ "Output argument to \"%s\" is const.", func->func->name); ++ ++ if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, &load->node); ++ ++ if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) ++ return false; ++ } ++ } ++ ++ if (func->return_var) ++ { ++ struct hlsl_ir_load *load; ++ ++ if (!(load = hlsl_new_var_load(ctx, func->return_var, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, &load->node); ++ } ++ else ++ { ++ struct hlsl_ir_node *expr; ++ ++ if (!(expr = hlsl_new_void_expr(ctx, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, expr); ++ } ++ ++ return true; ++} ++ ++static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_type *type = arg->data_type; ++ ++ if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) ++ return arg; ++ ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ return add_implicit_conversion(ctx, params->instrs, arg, type, loc); ++} ++ ++static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, ++ struct hlsl_type *type, const struct vkd3d_shader_location *loc) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < params->args_count; ++i) ++ { ++ struct hlsl_ir_node *new_arg; ++ ++ if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) ++ return false; ++ params->args[i] = new_arg; ++ } ++ ++ return true; ++} + + static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +@@ -2315,12 +2460,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * base = expr_common_base_type(base, arg_type->base_type); @@ -8982,7 +14769,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { matrices = true; dimx = min(dimx, arg_type->dimx); -@@ -2369,7 +2375,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, +@@ -2369,7 +2514,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; @@ -8991,7 +14778,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 return convert_args(ctx, params, type, loc); } -@@ -2383,20 +2389,18 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, +@@ -2383,32 +2528,78 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9004,94 +14791,66 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; - list_add_tail(params->instrs, &one->node.entry); -+ list_add_tail(params->instrs, &one->entry); ++ hlsl_block_add_instr(params->instrs, one); if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->node.entry); -+ list_add_tail(params->instrs, &zero->entry); ++ hlsl_block_add_instr(params->instrs, zero); - mul = &one->node; + mul = one; count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) -@@ -2404,52 +2408,123 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + { +- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) ++ return false; ++ ++ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) ++ return false; ++ } ++ ++ return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); ++} ++ ++static bool intrinsic_any(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; ++ unsigned int i, count; ++ ++ if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) ++ { ++ hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); ++ return false; ++ } ++ ++ if (arg->data_type->base_type == HLSL_TYPE_FLOAT) ++ { ++ if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, zero); ++ ++ if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) return false; - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &load->node, mul, loc))) -+ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) - return false; - } - -- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, &zero->node, loc); --} -- --/* Find the type corresponding to the given source type, with the same -- * dimensions but a different base type. */ --static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, -- const struct hlsl_type *type, enum hlsl_base_type base_type) --{ -- return hlsl_get_numeric_type(ctx, type->type, base_type, type->dimx, type->dimy); -+ return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); - } - --static bool intrinsic_asuint(struct hlsl_ctx *ctx, -+static bool intrinsic_any(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -- struct hlsl_type *data_type; -+ struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; -+ unsigned int i, count; - -- if (params->args_count != 1 && params->args_count != 3) -+ if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) - { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); -+ hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); - return false; - } - -- if (params->args_count == 3) -+ if (arg->data_type->base_type == HLSL_TYPE_FLOAT) - { -- hlsl_fixme(ctx, loc, "Double-to-integer conversion."); -- return false; -- } -+ if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -+ return false; -+ list_add_tail(params->instrs, &zero->entry); - -- data_type = params->args[0]->data_type; -- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) -- { -- struct vkd3d_string_buffer *string; -+ if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) -+ return false; - -- if ((string = hlsl_type_to_string(ctx, data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", -- string->buffer); -- hlsl_release_string_buffer(ctx, string); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); - } -- data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); ++ } + else if (arg->data_type->base_type == HLSL_TYPE_BOOL) + { + if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) -+ return false; -+ list_add_tail(params->instrs, &bfalse->entry); + return false; ++ hlsl_block_add_instr(params->instrs, bfalse); + + or = bfalse; + + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { -+ if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) @@ -9099,17 +14858,19 @@ index fd1eaf6ec95..0e07fe578e1 100644 + } + + return true; -+ } -+ + } + +- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, &zero->node, loc); + hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); + return false; -+} -+ -+/* Find the type corresponding to the given source type, with the same -+ * dimensions but a different base type. */ -+static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, -+ const struct hlsl_type *type, enum hlsl_base_type base_type) -+{ + } + + /* Find the type corresponding to the given source type, with the same +@@ -2416,7 +2607,30 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, + static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) + { +- return hlsl_get_numeric_type(ctx, type->type, base_type, type->dimx, type->dimy); + return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +} + @@ -9134,43 +14895,45 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); -+} -+ -+static bool intrinsic_asuint(struct hlsl_ctx *ctx, + } + + static bool intrinsic_asuint(struct hlsl_ctx *ctx, +@@ -2469,6 +2683,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); + } + ++static bool intrinsic_clip(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *data_type; ++ struct hlsl_ir_node *condition, *jump; + -+ if (params->args_count != 1 && params->args_count != 3) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); ++ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; -+ } + -+ if (params->args_count == 3) -+ { -+ hlsl_fixme(ctx, loc, "Double-to-integer conversion."); -+ return false; -+ } ++ condition = params->args[0]; + -+ data_type = params->args[0]->data_type; -+ if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) ++ if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) + { + struct vkd3d_string_buffer *string; + -+ if ((string = hlsl_type_to_string(ctx, data_type))) ++ if ((string = hlsl_type_to_string(ctx, condition->data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", -+ string->buffer); ++ "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); + hlsl_release_string_buffer(ctx, string); ++ return false; + } -+ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); - - operands[0] = params->args[0]; - return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); -@@ -2483,7 +2558,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, ++ ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, jump); ++ ++ return true; ++} ++ + static bool intrinsic_cos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2483,7 +2725,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9179,17 +14942,17 @@ index fd1eaf6ec95..0e07fe578e1 100644 struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg, *mul1, *mul2; struct hlsl_type *cast_type; -@@ -2504,35 +2579,55 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, +@@ -2504,35 +2746,99 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl1->node.entry); -+ list_add_tail(params->instrs, &arg1_swzl1->entry); ++ hlsl_block_add_instr(params->instrs, arg1_swzl1); if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl1->node.entry); -+ list_add_tail(params->instrs, &arg2_swzl1->entry); ++ hlsl_block_add_instr(params->instrs, arg2_swzl1); - if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl1->node, &arg2_swzl1->node, loc))) @@ -9199,17 +14962,18 @@ index fd1eaf6ec95..0e07fe578e1 100644 - if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, *loc))) + if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) return false; - list_add_tail(params->instrs, &mul1_neg->entry); +- list_add_tail(params->instrs, &mul1_neg->entry); ++ hlsl_block_add_instr(params->instrs, mul1_neg); if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl2->node.entry); -+ list_add_tail(params->instrs, &arg1_swzl2->entry); ++ hlsl_block_add_instr(params->instrs, arg1_swzl2); if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl2->node.entry); -+ list_add_tail(params->instrs, &arg2_swzl2->entry); ++ hlsl_block_add_instr(params->instrs, arg2_swzl2); - if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl2->node, &arg2_swzl2->node, loc))) @@ -9230,6 +14994,28 @@ index fd1eaf6ec95..0e07fe578e1 100644 + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); +} + ++static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); ++} ++ ++static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); ++} ++ +static bool intrinsic_ddy(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ @@ -9240,11 +15026,33 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); +} ++ ++static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); ++} ++ ++static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); ++} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -2565,8 +2660,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, +@@ -2565,8 +2871,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, static bool intrinsic_exp(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9254,19 +15062,19 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) return false; -@@ -2574,9 +2668,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, +@@ -2574,9 +2879,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; - list_add_tail(params->instrs, &coeff->node.entry); -+ list_add_tail(params->instrs, &coeff->entry); ++ hlsl_block_add_instr(params->instrs, coeff); - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &coeff->node, params->args[0], loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false; return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); -@@ -2604,6 +2698,43 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, +@@ -2604,6 +2909,47 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); } @@ -9274,6 +15082,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select, *zero; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + static const struct hlsl_constant_value zero_value; + + if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) @@ -9287,7 +15096,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) + return false; -+ list_add_tail(params->instrs, &zero->entry); ++ hlsl_block_add_instr(params->instrs, zero); + + if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) + return false; @@ -9301,7 +15110,10 @@ index fd1eaf6ec95..0e07fe578e1 100644 + if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) + return false; + -+ if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) ++ operands[0] = ge; ++ operands[1] = frac; ++ operands[2] = neg_frac; ++ if (!(select = add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, x->data_type, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); @@ -9310,7 +15122,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 static bool intrinsic_frac(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -2635,7 +2766,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, +@@ -2635,7 +2981,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *arg, *dot; @@ -9319,7 +15131,16 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -2692,20 +2823,18 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, +@@ -2675,7 +3021,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, + } + + static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, +- struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, ++ struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *log, *mul; +@@ -2692,91 +3038,77 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, static bool intrinsic_lit(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -9328,149 +15149,133 @@ index fd1eaf6ec95..0e07fe578e1 100644 - struct hlsl_ir_node *n_l, *n_h, *m; - struct hlsl_ir_node *diffuse; - struct hlsl_ir_store *store; -+ struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow, *load; -+ struct hlsl_ir_node *n_l, *n_h, *m, *diffuse, *zero, *store, *init; -+ struct hlsl_constant_value init_value; -+ struct hlsl_ir_load *var_load; - struct hlsl_deref var_deref; - struct hlsl_type *ret_type; +- struct hlsl_deref var_deref; +- struct hlsl_type *ret_type; - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - struct hlsl_block block; +- struct hlsl_ir_var *var; +- struct hlsl_block block; ++ struct hlsl_ir_function_decl *func; - if (params->args[0]->data_type->type != HLSL_CLASS_SCALAR - || params->args[1]->data_type->type != HLSL_CLASS_SCALAR - || params->args[2]->data_type->type != HLSL_CLASS_SCALAR) ++ static const char body[] = ++ "float4 lit(float n_l, float n_h, float m)\n" ++ "{\n" ++ " float4 ret;\n" ++ " ret.xw = 1.0;\n" ++ " ret.y = max(n_l, 0);\n" ++ " ret.z = (n_l < 0 || n_h < 0) ? 0 : pow(n_h, m);\n" ++ " return ret;\n" ++ "}"; ++ + if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR + || params->args[1]->data_type->class != HLSL_CLASS_SCALAR + || params->args[2]->data_type->class != HLSL_CLASS_SCALAR) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); return false; -@@ -2726,37 +2855,35 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, + } + +- if (!(n_l = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; +- +- if (!(n_h = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) ++ if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) return false; - hlsl_init_simple_deref_from_var(&var_deref, var); + +- if (!(m = intrinsic_float_convert_arg(ctx, params, params->args[2], loc))) +- return false; +- +- ret_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); +- +- if (!(var = hlsl_new_synthetic_var(ctx, "lit", ret_type, loc))) +- return false; +- hlsl_init_simple_deref_from_var(&var_deref, var); ++ return add_user_call(ctx, func, params, loc); ++} - if (!(init = hlsl_new_constant(ctx, ret_type, loc))) -+ init_value.u[0].f = 1.0f; -+ init_value.u[1].f = 0.0f; -+ init_value.u[2].f = 0.0f; -+ init_value.u[3].f = 1.0f; -+ if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) - return false; +- return false; - init->value[0].f = 1.0f; - init->value[1].f = 0.0f; - init->value[2].f = 0.0f; - init->value[3].f = 1.0f; - list_add_tail(params->instrs, &init->node.entry); -+ list_add_tail(params->instrs, &init->entry); - -- if (!(store = hlsl_new_simple_store(ctx, var, &init->node))) -+ if (!(store = hlsl_new_simple_store(ctx, var, init))) - return false; -- list_add_tail(params->instrs, &store->node.entry); -+ list_add_tail(params->instrs, &store->entry); - - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; -- list_add_tail(params->instrs, &zero->node.entry); -+ list_add_tail(params->instrs, &zero->entry); - - /* Diffuse component. */ -- if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, &zero->node, loc))) -+ if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) - return false; - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse))) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) - return false; - list_move_tail(params->instrs, &block.instrs); - - /* Specular component. */ -- if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, -- n_h, &zero->node, loc))) -+ if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) - return false; - -- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, -- n_l, &zero->node, loc))) -+ if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) - return false; - - if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) -@@ -2765,20 +2892,67 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, - if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) - return false; - -- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, &zero->node, specular_pow))) -+ if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) - return false; - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 2, &load->node))) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) - return false; - list_move_tail(params->instrs, &block.instrs); - -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) -+ if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); -+ list_add_tail(params->instrs, &var_load->node.entry); - - return true; - } - +static bool intrinsic_log(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; -+ + +- if (!(store = hlsl_new_simple_store(ctx, var, &init->node))) + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ + return false; +- list_add_tail(params->instrs, &store->node.entry); + +- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) -+ return false; -+ + return false; +- list_add_tail(params->instrs, &zero->node.entry); + +- /* Diffuse component. */ +- if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, &zero->node, loc))) + /* ln(2) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.69314718055f, loc))) -+ return false; -+ + return false; + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse))) +- return false; +- list_move_tail(params->instrs, &block.instrs); + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} -+ + +- /* Specular component. */ +- if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, +- n_h, &zero->node, loc))) +- return false; +static bool intrinsic_log10(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; -+ + +- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, +- n_l, &zero->node, loc))) + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ + return false; + +- if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) -+ return false; -+ + return false; + +- if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) + /* 1 / log2(10) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.301029996f, loc))) -+ return false; -+ + return false; + +- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, &zero->node, specular_pow))) +- return false; + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} -+ + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 2, &load->node))) +- return false; +- list_move_tail(params->instrs, &block.instrs); +static bool intrinsic_log2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; -+ + +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ + return false; +- list_add_tail(params->instrs, &load->node.entry); + +- return true; + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); -+} -+ + } + static bool intrinsic_max(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2808,15 +2982,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -2808,15 +3140,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, struct hlsl_ir_load *load; struct hlsl_ir_var *var; @@ -9489,7 +15294,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { vect_count++; cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); -@@ -2854,13 +3028,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -2854,21 +3186,21 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, for (j = 0; j < matrix_type->dimy; ++j) { struct hlsl_ir_node *instr = NULL; @@ -9502,10 +15307,14 @@ index fd1eaf6ec95..0e07fe578e1 100644 - struct hlsl_ir_node *mul; + struct hlsl_ir_node *value1, *value2, *mul; - if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) +- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) ++ if (!(value1 = hlsl_add_load_component(ctx, params->instrs, ++ cast1, j * cast1->data_type->dimx + k, loc))) return false; -@@ -2868,7 +3040,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) + +- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) ++ if (!(value2 = hlsl_add_load_component(ctx, params->instrs, ++ cast2, k * cast2->data_type->dimx + i, loc))) return false; - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &value1->node, &value2->node, loc))) @@ -9513,23 +15322,27 @@ index fd1eaf6ec95..0e07fe578e1 100644 return false; if (instr) -@@ -2882,13 +3054,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, +@@ -2882,15 +3214,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } } - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; - list_move_tail(params->instrs, &block.instrs); +- list_move_tail(params->instrs, &block.instrs); ++ hlsl_block_add_block(params->instrs, &block); } } - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); +- list_add_tail(params->instrs, &load->node.entry); ++ hlsl_block_add_instr(params->instrs, &load->node); -@@ -2901,7 +3073,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, + return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); + } +@@ -2901,7 +3233,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *dot, *rsq, *arg; @@ -9538,93 +15351,128 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -2986,6 +3158,42 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, +@@ -2986,74 +3318,80 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); } +-static bool intrinsic_sin(struct hlsl_ctx *ctx, +static bool intrinsic_sign(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg; +- +- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) +- return false; +- +- return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); +-} + struct hlsl_ir_node *lt, *neg, *op1, *op2, *zero, *arg = params->args[0]; + static const struct hlsl_constant_value zero_value; -+ + +-/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ +-static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res; +- struct hlsl_ir_constant *one, *minus_two, *three; + struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, + arg->data_type->dimx, arg->data_type->dimy); -+ + +- if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) -+ return false; -+ list_add_tail(params->instrs, &zero->entry); -+ + return false; ++ hlsl_block_add_instr(params->instrs, zero); + +- min_arg = params->args[0]; +- max_arg = params->args[1]; +- x_arg = params->args[2]; + /* Check if 0 < arg, cast bool to int */ -+ + +- if (!(min_arg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, min_arg, loc))) + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) -+ return false; -+ + return false; + +- if (!(p_num = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, x_arg, min_arg, loc))) + if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) -+ return false; -+ + return false; + +- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, max_arg, min_arg, loc))) +- return false; + /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ -+ + +- if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) -+ return false; -+ + return false; +- list_add_tail(params->instrs, &one->node.entry); + +- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc))) + if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) -+ return false; -+ + return false; + +- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) + if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) -+ return false; -+ + return false; + +- if (!(p = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, p, loc))) +- return false; + /* Adding these two together will make 1 when > 0, -1 when < 0, and 0 when neither */ + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, neg, op1, loc); +} -+ - static bool intrinsic_sin(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3001,8 +3209,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, - static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res; -- struct hlsl_ir_constant *one, *minus_two, *three; -+ struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res, *one, *minus_two, *three; - if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) - return false; -@@ -3022,9 +3229,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - - if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) - return false; -- list_add_tail(params->instrs, &one->node.entry); -+ list_add_tail(params->instrs, &one->entry); - -- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc))) -+ if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) - return false; - - if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) -@@ -3035,16 +3242,16 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, - - if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) - return false; +- if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) +- return false; - list_add_tail(params->instrs, &minus_two->node.entry); -+ list_add_tail(params->instrs, &minus_two->entry); ++static bool intrinsic_sin(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; - if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) +- if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) return false; - list_add_tail(params->instrs, &three->node.entry); -+ list_add_tail(params->instrs, &three->entry); - if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &minus_two->node, p, loc))) -+ if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) - return false; +- return false; ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); ++} - if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &three->node, res, loc))) -+ if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, three, res, loc))) +- return false; ++/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ ++static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s smoothstep(%s low, %s high, %s x)\n" ++ "{\n" ++ " %s p = saturate((x - low) / (high - low));\n" ++ " return (p * p) * (3 - 2 * p);\n" ++ "}"; + +- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) ++ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + +- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, res, loc))) ++ if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) ++ return false; ++ func = hlsl_compile_internal_function(ctx, "smoothstep", body); ++ vkd3d_free(body); ++ if (!func) return false; - if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) -@@ -3081,7 +3288,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, +- return true; ++ return add_user_call(ctx, func, params, loc); + } + + static bool intrinsic_sqrt(struct hlsl_ctx *ctx, +@@ -3081,7 +3419,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, return false; type = ge->data_type; @@ -9633,7 +15481,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); } -@@ -3090,9 +3297,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3090,9 +3428,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * { struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; const struct hlsl_type *sampler_type; @@ -9644,7 +15492,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (params->args_count != 2 && params->args_count != 4) { -@@ -3107,7 +3312,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3103,11 +3439,11 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + + if (params->args_count == 4) + { +- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); ++ hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); } sampler_type = params->args[0]->data_type; @@ -9653,7 +15506,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) { struct vkd3d_string_buffer *string; -@@ -3118,24 +3323,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -3118,27 +3454,63 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * name, ctx->builtin_types.sampler[dim]->name, string->buffer); hlsl_release_string_buffer(ctx, string); } @@ -9667,7 +15520,43 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) - coords = params->args[1]; +- coords = params->args[1]; ++ { ++ return false; ++ } ++ ++ /* tex1D() functions never produce 1D resource declarations. For newer profiles half offset ++ is used for the second coordinate, while older ones appear to replicate first coordinate.*/ ++ if (dim == HLSL_SAMPLER_DIM_1D) ++ { ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_node *half; ++ struct hlsl_ir_var *var; ++ unsigned int idx = 0; ++ ++ if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) ++ return false; ++ ++ initialize_var_components(ctx, params->instrs, var, &idx, coords); ++ if (shader_profile_version_ge(ctx, 4, 0)) ++ { ++ if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, half); ++ ++ initialize_var_components(ctx, params->instrs, var, &idx, half); ++ } ++ else ++ initialize_var_components(ctx, params->instrs, var, &idx, coords); ++ ++ if (!(load = hlsl_new_var_load(ctx, var, loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, &load->node); ++ ++ coords = &load->node; ++ ++ dim = HLSL_SAMPLER_DIM_2D; ++ } load_params.coords = coords; + load_params.resource = params->args[0]; @@ -9677,11 +15566,31 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); -+ list_add_tail(params->instrs, &load->entry); ++ hlsl_block_add_instr(params->instrs, load); return true; } -@@ -3156,13 +3356,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, ++static bool intrinsic_tex1D(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); ++} ++ + static bool intrinsic_tex2D(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3151,32 +3523,39 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); + } + ++static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); ++} ++ + static bool intrinsic_transpose(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg = params->args[0]; struct hlsl_type *arg_type = arg->data_type; @@ -9698,37 +15607,45 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -3174,7 +3375,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + if ((string = hlsl_type_to_string(ctx, arg_type))) + hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", ++ "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); return false; } - if (arg_type->type == HLSL_CLASS_SCALAR) + if (arg_type->class == HLSL_CLASS_SCALAR) { - list_add_tail(params->instrs, &arg->entry); +- list_add_tail(params->instrs, &arg->entry); ++ hlsl_block_add_instr(params->instrs, arg); return true; -@@ -3190,21 +3391,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + } + +@@ -3190,21 +3569,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { for (j = 0; j < arg_type->dimy; ++j) { - struct hlsl_ir_store *store; struct hlsl_block block; - if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) +- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) return false; - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, &load->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) return false; - list_move_tail(params->instrs, &block.instrs); - } - } - -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) +- list_move_tail(params->instrs, &block.instrs); ++ hlsl_block_add_block(params->instrs, &block); ++ } ++ } ++ + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); -+ list_add_tail(params->instrs, &var_load->node.entry); ++ return false; ++ hlsl_block_add_instr(params->instrs, &var_load->node); + + return true; +} @@ -9758,7 +15675,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Wrong argument type '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); -+ } + } + + return false; + } @@ -9768,26 +15685,28 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) + return false; -+ list_add_tail(params->instrs, &c->entry); ++ hlsl_block_add_instr(params->instrs, c); + + if (arg_type->class == HLSL_CLASS_VECTOR) + { + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) + return false; -+ list_add_tail(params->instrs, &swizzle->entry); ++ hlsl_block_add_instr(params->instrs, swizzle); + + arg = swizzle; -+ } -+ + } + +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) -+ return false; + return false; +- list_add_tail(params->instrs, &load->node.entry); + -+ if (ctx->profile->major_version >= 4) ++ if (shader_profile_version_ge(ctx, 4, 0)) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); return true; } -@@ -3220,22 +3475,31 @@ static const struct intrinsic_function +@@ -3220,22 +3653,36 @@ static const struct intrinsic_function intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ @@ -9798,10 +15717,15 @@ index fd1eaf6ec95..0e07fe578e1 100644 + {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, ++ {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, ++ {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, ++ {"ddx_fine", 1, true, intrinsic_ddx_fine}, + {"ddy", 1, true, intrinsic_ddy}, ++ {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, ++ {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, @@ -9819,7 +15743,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, {"mul", 2, true, intrinsic_mul}, -@@ -3245,6 +3509,7 @@ intrinsic_functions[] = +@@ -3245,13 +3692,17 @@ intrinsic_functions[] = {"round", 1, true, intrinsic_round}, {"rsqrt", 1, true, intrinsic_rsqrt}, {"saturate", 1, true, intrinsic_saturate}, @@ -9827,47 +15751,107 @@ index fd1eaf6ec95..0e07fe578e1 100644 {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, -@@ -3252,6 +3517,7 @@ intrinsic_functions[] = + {"step", 2, true, intrinsic_step}, ++ {"tex1D", -1, false, intrinsic_tex1D}, {"tex2D", -1, false, intrinsic_tex2D}, {"tex3D", -1, false, intrinsic_tex3D}, ++ {"texCUBE", -1, false, intrinsic_texCUBE}, {"transpose", 1, true, intrinsic_transpose}, + {"trunc", 1, true, intrinsic_trunc}, }; static int intrinsic_function_name_compare(const void *a, const void *b) -@@ -3291,11 +3557,11 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -3261,7 +3712,7 @@ static int intrinsic_function_name_compare(const void *a, const void *b) + return strcmp(a, func->name); + } - if (param->storage_modifiers & HLSL_STORAGE_IN) - { +-static struct list *add_call(struct hlsl_ctx *ctx, const char *name, ++static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, + struct parse_initializer *args, const struct vkd3d_shader_location *loc) + { + struct intrinsic_function *intrinsic; +@@ -3269,79 +3720,8 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + + if ((decl = find_function_call(ctx, name, args, loc))) + { +- struct hlsl_ir_node *call; +- unsigned int i; +- +- assert(args->args_count == decl->parameters.count); +- +- for (i = 0; i < decl->parameters.count; ++i) +- { +- struct hlsl_ir_var *param = decl->parameters.vars[i]; +- struct hlsl_ir_node *arg = args->args[i]; +- +- if (!hlsl_types_are_equal(arg->data_type, param->data_type)) +- { +- struct hlsl_ir_node *cast; +- +- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) +- goto fail; +- args->args[i] = cast; +- arg = cast; +- } +- +- if (param->storage_modifiers & HLSL_STORAGE_IN) +- { - struct hlsl_ir_store *store; -+ struct hlsl_ir_node *store; - - if (!(store = hlsl_new_simple_store(ctx, param, arg))) - goto fail; +- +- if (!(store = hlsl_new_simple_store(ctx, param, arg))) +- goto fail; - list_add_tail(args->instrs, &store->node.entry); -+ list_add_tail(args->instrs, &store->entry); - } - } - -@@ -3316,7 +3582,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, - "Output argument to \"%s\" is const.", decl->func->name); - +- } +- } +- +- if (!(call = hlsl_new_call(ctx, decl, loc))) ++ if (!add_user_call(ctx, decl, args, loc)) + goto fail; +- list_add_tail(args->instrs, &call->entry); +- +- for (i = 0; i < decl->parameters.count; ++i) +- { +- struct hlsl_ir_var *param = decl->parameters.vars[i]; +- struct hlsl_ir_node *arg = args->args[i]; +- +- if (param->storage_modifiers & HLSL_STORAGE_OUT) +- { +- struct hlsl_ir_load *load; +- +- if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) +- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, +- "Output argument to \"%s\" is const.", decl->func->name); +- - if (!(load = hlsl_new_var_load(ctx, param, arg->loc))) -+ if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) - goto fail; - list_add_tail(args->instrs, &load->node.entry); - -@@ -3329,7 +3595,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - { - struct hlsl_ir_load *load; - +- goto fail; +- list_add_tail(args->instrs, &load->node.entry); +- +- if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) +- goto fail; +- } +- } +- +- if (decl->return_var) +- { +- struct hlsl_ir_load *load; +- - if (!(load = hlsl_new_var_load(ctx, decl->return_var, *loc))) -+ if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) - goto fail; - list_add_tail(args->instrs, &load->node.entry); - } -@@ -3360,7 +3626,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, +- goto fail; +- list_add_tail(args->instrs, &load->node.entry); +- } +- else +- { +- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; +- struct hlsl_ir_node *expr; +- +- if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) +- goto fail; +- list_add_tail(args->instrs, &expr->entry); +- } + } + else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), + sizeof(*intrinsic_functions), intrinsic_function_name_compare))) +@@ -3360,7 +3740,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, for (i = 0; i < args->args_count; ++i) { @@ -9876,11 +15860,13 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -3397,20 +3663,20 @@ fail: +@@ -3396,21 +3776,21 @@ fail: + return NULL; } - static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, +-static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, - struct parse_initializer *params, struct vkd3d_shader_location loc) ++static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, + struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_load *load; @@ -9900,15 +15886,20 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -3455,320 +3721,526 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) +@@ -3426,7 +3806,7 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; +- list_add_tail(params->instrs, &load->node.entry); ++ hlsl_block_add_instr(params->instrs, &load->node); + + vkd3d_free(params->args); + return params->instrs; +@@ -3455,320 +3835,704 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) } } -static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -- const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) --{ -- const struct hlsl_type *object_type = object->data_type; -- struct hlsl_ir_load *object_load; +static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct hlsl_type *object_type, + const char *method, const struct vkd3d_shader_location *loc) +{ @@ -9921,10 +15912,11 @@ index fd1eaf6ec95..0e07fe578e1 100644 + return false; +} + -+static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; ++static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +- struct hlsl_ir_load *object_load; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; @@ -9936,7 +15928,9 @@ index fd1eaf6ec95..0e07fe578e1 100644 + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } -+ + +- if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE +- || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + @@ -9949,7 +15943,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + } + if (multisampled) + { -+ if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) + return false; + } @@ -9957,7 +15951,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + assert(offset_dim); + if (params->args_count > 1 + multisampled) + { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } @@ -9967,7 +15961,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + } + + /* +1 for the mipmap level for non-multisampled textures */ -+ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) + return false; + @@ -9976,11 +15970,11 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; -+ list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + return true; +} + -+static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; @@ -10007,23 +16001,25 @@ index fd1eaf6ec95..0e07fe578e1 100644 + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ { -+ struct vkd3d_string_buffer *string; -+ + { + struct vkd3d_string_buffer *string; + +- if ((string = hlsl_type_to_string(ctx, object_type))) + if ((string = hlsl_type_to_string(ctx, sampler_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Type '%s' does not have methods.", string->buffer); + "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + -+ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (offset_dim && params->args_count > 2) + { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } @@ -10039,12 +16035,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; -+ list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + + return true; +} + -+static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; @@ -10083,21 +16079,23 @@ index fd1eaf6ec95..0e07fe578e1 100644 + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", + name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_release_string_buffer(ctx, string); + return false; + } + +- /* Only HLSL_IR_LOAD can return an object. */ +- object_load = hlsl_ir_load(object); ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + -+ if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.cmp = params->args[2]; + + if (offset_dim && params->args_count > 3) + { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } @@ -10113,12 +16111,12 @@ index fd1eaf6ec95..0e07fe578e1 100644 + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; -+ list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + + return true; +} + -+static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; @@ -10128,15 +16126,23 @@ index fd1eaf6ec95..0e07fe578e1 100644 + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + unsigned int read_channel; -+ + +- if (!strcmp(name, "Load") + if (object_type->sampler_dim != HLSL_SAMPLER_DIM_2D + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DARRAY -+ && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE -+ && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) -+ { + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + { +- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); +- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; +- struct hlsl_ir_resource_load *load; +- bool multisampled; + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } -+ + +- multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS +- || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + if (!strcmp(name, "GatherGreen")) + { + load_params.type = HLSL_RESOURCE_GATHER_GREEN; @@ -10157,16 +16163,23 @@ index fd1eaf6ec95..0e07fe578e1 100644 + load_params.type = HLSL_RESOURCE_GATHER_RED; + read_channel = 0; + } -+ + +- if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) + if (!strcmp(name, "Gather") || !offset_dim) + { + if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", +- 1 + multisampled, 3 + multisampled, params->args_count); + "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", + name, 3 + !!offset_dim, params->args_count); -+ return false; -+ } + return false; + } +- if (multisampled) +- { +- hlsl_fixme(ctx, loc, "Load() sampling index parameter."); +- } + } + else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) + { @@ -10175,73 +16188,6 @@ index fd1eaf6ec95..0e07fe578e1 100644 + name, params->args_count); + return false; + } -+ -+ if (params->args_count == 3 + !!offset_dim || params->args_count == 7) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); - -- if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ if (params->args_count == 6 || params->args_count == 7) -+ { -+ hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); -+ } -+ else if (offset_dim && params->args_count > 2) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -+ return false; -+ } -+ -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - -- if ((string = hlsl_type_to_string(ctx, object_type))) -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Type '%s' does not have methods.", string->buffer); -+ "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } - -- /* Only HLSL_IR_LOAD can return an object. */ -- object_load = hlsl_ir_load(object); -- -- if (!strcmp(name, "Load") -- && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE -- && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) -+ if (read_channel >= object_type->e.resource_format->dimx) - { -- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; -- struct hlsl_ir_resource_load *load; -- bool multisampled; -- -- multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Method %s() requires at least %u channels.", name, read_channel + 1); -+ return false; -+ } - -- if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", -- 1 + multisampled, 3 + multisampled, params->args_count); -- return false; -- } -- if (multisampled) -- { -- hlsl_fixme(ctx, loc, "Load() sampling index parameter."); -- } -+ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ return false; - assert(offset_dim); - if (params->args_count > 1 + multisampled) @@ -10254,51 +16200,110 @@ index fd1eaf6ec95..0e07fe578e1 100644 - { - hlsl_fixme(ctx, loc, "Tiled resource status argument."); - } -+ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; ++ if (params->args_count == 3 + !!offset_dim || params->args_count == 7) ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); - /* +1 for the mipmap level */ - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) -- return false; -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ list_add_tail(instrs, &load->entry); -+ return true; -+} ++ if (params->args_count == 6 || params->args_count == 7) ++ { ++ hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); ++ } ++ else if (offset_dim && params->args_count > 2) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; ++ } - load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; -+static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ struct hlsl_resource_load_params load_params = { 0 }; -+ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ const struct hlsl_type *sampler_type; -+ struct hlsl_ir_node *load; ++ sampler_type = params->args[0]->data_type; ++ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ { ++ struct vkd3d_string_buffer *string; - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); -- return true; -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) ++ if ((string = hlsl_type_to_string(ctx, sampler_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (read_channel >= object_type->e.resource_format->dimx) + { -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Method %s() requires at least %u channels.", name, read_channel + 1); ++ return false; ++ } ++ ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ return false; ++ ++ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, load); ++ return true; ++} ++ ++static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, ++ struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *load; ++ ++ if (!dest) + return true; ++ ++ if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) ++ return false; ++ ++ if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) ++ return false; ++ ++ return true; ++} ++ ++static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ bool uint_resinfo, has_uint_arg, has_float_arg; ++ struct hlsl_resource_load_params load_params; ++ struct hlsl_ir_node *sample_info, *res_info; ++ struct hlsl_ir_node *zero = NULL, *void_ret; ++ struct hlsl_type *uint_type, *float_type; ++ unsigned int i, j; ++ enum func_argument ++ { ++ ARG_MIP_LEVEL, ++ ARG_WIDTH, ++ ARG_HEIGHT, ++ ARG_ELEMENT_COUNT, ++ ARG_LEVEL_COUNT, ++ ARG_SAMPLE_COUNT, ++ ARG_MAX_ARGS, ++ }; ++ struct hlsl_ir_node *args[ARG_MAX_ARGS] = { 0 }; ++ static const struct overload ++ { ++ enum hlsl_sampler_dim sampler_dim; ++ unsigned int args_count; ++ enum func_argument args[ARG_MAX_ARGS]; } - else if (!strcmp(name, "Sample") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) -+ -+ if (!strcmp(name, "SampleLevel")) -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ else -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; -+ -+ if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) ++ overloads[] = { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); @@ -10306,61 +16311,142 @@ index fd1eaf6ec95..0e07fe578e1 100644 - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", -+ name, 4 + !!offset_dim, params->args_count); -+ return false; ++ { HLSL_SAMPLER_DIM_1D, 1, { ARG_WIDTH } }, ++ { HLSL_SAMPLER_DIM_1D, 3, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_1DARRAY, 2, { ARG_WIDTH, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_1DARRAY, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_2D, 2, { ARG_WIDTH, ARG_HEIGHT } }, ++ { HLSL_SAMPLER_DIM_2D, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_3D, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_3D, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_CUBE, 2, { ARG_WIDTH, ARG_HEIGHT } }, ++ { HLSL_SAMPLER_DIM_CUBE, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_CUBEARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, ++ { HLSL_SAMPLER_DIM_CUBEARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DMS, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_SAMPLE_COUNT } }, ++ { HLSL_SAMPLER_DIM_2DMSARRAY, 4, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_SAMPLE_COUNT } }, ++ }; ++ const struct overload *o = NULL; ++ ++ if (object_type->sampler_dim > HLSL_SAMPLER_DIM_LAST_TEXTURE) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "GetDimensions() is not defined for this type."); + } ++ ++ uint_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); ++ float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); ++ has_uint_arg = has_float_arg = false; ++ for (i = 0; i < ARRAY_SIZE(overloads); ++i) ++ { ++ const struct overload *iter = &overloads[i]; - if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) -- { ++ if (iter->sampler_dim == object_type->sampler_dim && iter->args_count == params->args_count) + { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); - return false; -- } -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ { -+ struct vkd3d_string_buffer *string; ++ for (j = 0; j < params->args_count; ++j) ++ { ++ args[iter->args[j]] = params->args[j]; ++ ++ /* Input parameter. */ ++ if (iter->args[j] == ARG_MIP_LEVEL) ++ { ++ if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ { ++ return false; ++ } ++ ++ continue; ++ } ++ ++ has_float_arg |= hlsl_types_are_equal(params->args[j]->data_type, float_type); ++ has_uint_arg |= hlsl_types_are_equal(params->args[j]->data_type, uint_type); ++ ++ if (params->args[j]->data_type->class != HLSL_CLASS_SCALAR) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected scalar arguments."); ++ break; ++ } ++ } ++ o = iter; ++ break; + } ++ } ++ uint_resinfo = !has_float_arg && has_uint_arg; - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } ++ if (!o) ++ { ++ struct vkd3d_string_buffer *string; - if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); -- hlsl_release_string_buffer(ctx, string); ++ if ((string = hlsl_type_to_string(ctx, object_type))) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Unexpected number of arguments %u for %s.%s().", params->args_count, string->buffer, name); + hlsl_release_string_buffer(ctx, string); - return false; -- } -+ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ load_params.coords = params->args[1]; + } ++ } ++ ++ if (!args[ARG_MIP_LEVEL]) ++ { ++ if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ args[ARG_MIP_LEVEL] = zero; ++ } ++ ++ memset(&load_params, 0, sizeof(load_params)); ++ load_params.type = HLSL_RESOURCE_RESINFO; ++ load_params.resource = object; ++ load_params.lod = args[ARG_MIP_LEVEL]; ++ load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); ++ ++ if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, res_info); ++ ++ if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) ++ return false; ++ ++ if (!add_assignment_from_component(ctx, block, args[ARG_HEIGHT], res_info, 1, loc)) ++ return false; ++ ++ if (!add_assignment_from_component(ctx, block, args[ARG_ELEMENT_COUNT], res_info, ++ object_type->sampler_dim == HLSL_SAMPLER_DIM_1DARRAY ? 1 : 2, loc)) ++ { ++ return false; ++ } - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); -+ if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -+ load_params.lod = params->args[2]; ++ if (!add_assignment_from_component(ctx, block, args[ARG_LEVEL_COUNT], res_info, 3, loc)) ++ return false; - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ if (offset_dim && params->args_count > 3) ++ if (args[ARG_SAMPLE_COUNT]) + { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) ++ memset(&load_params, 0, sizeof(load_params)); ++ load_params.type = HLSL_RESOURCE_SAMPLE_INFO; ++ load_params.resource = object; ++ load_params.format = args[ARG_SAMPLE_COUNT]->data_type; ++ if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) return false; -+ } ++ hlsl_block_add_instr(block, sample_info); - if (offset_dim && params->args_count > 2) - { @@ -10368,30 +16454,18 @@ index fd1eaf6ec95..0e07fe578e1 100644 - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -+ if (params->args_count > 3 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); - -- if (params->args_count > 2 + !!offset_dim) -- hlsl_fixme(ctx, loc, "Sample() clamp parameter."); -- if (params->args_count > 3 + !!offset_dim) -- hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ load_params.format = object_type->e.resource_format; -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; - -- load_params.format = object_type->e.resource_format; -- load_params.resource = object_load->src; -- load_params.sampler = sampler_load->src; -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) ++ return false; ++ } ++ ++ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) + return false; -+ list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, void_ret); ++ + return true; +} - -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- list_add_tail(instrs, &load->node.entry); -+static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ ++static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; @@ -10400,18 +16474,71 @@ index fd1eaf6ec95..0e07fe578e1 100644 + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; ++ ++ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) ++ { ++ return raise_invalid_method_object_type(ctx, object_type, name, loc); ++ } ++ ++ if (!strcmp(name, "SampleLevel")) ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; ++ else ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; ++ ++ if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", ++ name, 4 + !!offset_dim, params->args_count); ++ return false; ++ } ++ ++ sampler_type = params->args[0]->data_type; ++ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, sampler_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } +- if (params->args_count > 2 + !!offset_dim) +- hlsl_fixme(ctx, loc, "Sample() clamp parameter."); +- if (params->args_count > 3 + !!offset_dim) +- hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ load_params.coords = params->args[1]; + +- load_params.format = object_type->e.resource_format; +- load_params.resource = object_load->src; +- load_params.sampler = sampler_load->src; ++ if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) ++ load_params.lod = params->args[2]; + +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ if (offset_dim && params->args_count > 3) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; +- list_add_tail(instrs, &load->node.entry); +- - return true; -- } + } - else if ((!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue") - || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha")) - && (object_type->sampler_dim == HLSL_SAMPLER_DIM_2D - || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DARRAY - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY)) -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { +- { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {0}; @@ -10419,7 +16546,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - unsigned int read_channel; -- + - if (!strcmp(name, "GatherGreen")) - { - load_params.type = HLSL_RESOURCE_GATHER_GREEN; @@ -10440,7 +16567,9 @@ index fd1eaf6ec95..0e07fe578e1 100644 - load_params.type = HLSL_RESOURCE_GATHER_RED; - read_channel = 0; - } -- ++ if (params->args_count > 3 + !!offset_dim) ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); + - if (!strcmp(name, "Gather") || !offset_dim) - { - if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) @@ -10458,11 +16587,17 @@ index fd1eaf6ec95..0e07fe578e1 100644 - name, params->args_count); - return false; - } -- ++ load_params.format = object_type->e.resource_format; ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; + - if (params->args_count == 3 + !!offset_dim || params->args_count == 7) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, load); ++ return true; ++} - if (params->args_count == 6 || params->args_count == 7) - { @@ -10474,19 +16609,25 @@ index fd1eaf6ec95..0e07fe578e1 100644 - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -+ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; ++static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ struct hlsl_resource_load_params load_params = { 0 }; ++ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); ++ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); ++ const struct hlsl_type *sampler_type; ++ struct hlsl_ir_node *load; - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; -+ if (params->args_count < 4 || params->args_count > 5 + !!offset_dim) ++ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected from 4 to %u, but got %u.", -+ name, 5 + !!offset_dim, params->args_count); -+ return false; ++ return raise_invalid_method_object_type(ctx, object_type, name, loc); + } - if ((string = hlsl_type_to_string(ctx, sampler_type))) @@ -10495,14 +16636,24 @@ index fd1eaf6ec95..0e07fe578e1 100644 - hlsl_release_string_buffer(ctx, string); - return false; - } ++ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; + +- if (read_channel >= object_type->e.resource_format->dimx) +- { ++ if (params->args_count < 4 || params->args_count > 5 + !!offset_dim) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected from 4 to %u, but got %u.", ++ name, 5 + !!offset_dim, params->args_count); ++ return false; ++ } ++ + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; - -- if (read_channel >= object_type->e.resource_format->dimx) -- { ++ + if ((string = hlsl_type_to_string(ctx, sampler_type))) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Method %s() requires at least %u channels.", name, read_channel + 1); @@ -10515,57 +16666,62 @@ index fd1eaf6ec95..0e07fe578e1 100644 - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); -+ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1]; - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; -+ if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], ++ if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddx = params->args[2]; - load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; -+ if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], ++ if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddy = params->args[3]; - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; ++ if (offset_dim && params->args_count > 4) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; - list_add_tail(instrs, &load->node.entry); - return true; -- } + } - else if (!strcmp(name, "SampleLevel") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) -+ if (offset_dim && params->args_count > 4) - { +- { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE_LOD}; - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; -- + - if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'SampleLevel': expected from 3 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; +- return false; - } -- ++ if (params->args_count > 4 + !!offset_dim) ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); + - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; -+ } ++ load_params.format = object_type->e.resource_format; ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; - if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, @@ -10573,31 +16729,16 @@ index fd1eaf6ec95..0e07fe578e1 100644 - hlsl_release_string_buffer(ctx, string); - return false; - } -+ if (params->args_count > 4 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); - -- /* Only HLSL_IR_LOAD can return an object. */ -- sampler_load = hlsl_ir_load(params->args[0]); -+ load_params.format = object_type->e.resource_format; -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- load_params.coords = params->args[1]; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; -+ list_add_tail(instrs, &load->entry); ++ hlsl_block_add_instr(block, load); + return true; +} - -- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -- load_params.lod = params->args[2]; ++ +static const struct method_function +{ + const char *name; -+ bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); +} +object_methods[] = @@ -10607,9 +16748,19 @@ index fd1eaf6ec95..0e07fe578e1 100644 + { "GatherBlue", add_gather_method_call }, + { "GatherGreen", add_gather_method_call }, + { "GatherRed", add_gather_method_call }, -+ + +- /* Only HLSL_IR_LOAD can return an object. */ +- sampler_load = hlsl_ir_load(params->args[0]); ++ { "GetDimensions", add_getdimensions_method_call }, + +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- load_params.coords = params->args[1]; + { "Load", add_load_method_call }, -+ + +- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) +- load_params.lod = params->args[2]; + { "Sample", add_sample_method_call }, + { "SampleBias", add_sample_lod_method_call }, + { "SampleCmp", add_sample_cmp_method_call }, @@ -10636,7 +16787,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 - load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; -+static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; @@ -10665,7 +16816,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), + sizeof(*method), object_method_function_name_compare))) + { -+ return method->handler(ctx, instrs, object, name, params, loc); ++ return method->handler(ctx, block, object, name, params, loc); + } + else + { @@ -10681,7 +16832,15 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -3846,6 +4318,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3800,6 +4564,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + char *name; + DWORD modifiers; + struct hlsl_ir_node *instr; ++ struct hlsl_block *block; + struct list *list; + struct parse_fields fields; + struct parse_function function; +@@ -3846,6 +4611,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_NAMESPACE %token KW_NOINTERPOLATION %token KW_OUT @@ -10689,7 +16848,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 %token KW_PASS %token KW_PIXELSHADER %token KW_PRECISE -@@ -3854,6 +4327,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3854,6 +4620,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_RETURN %token KW_REGISTER %token KW_ROW_MAJOR @@ -10698,23 +16857,83 @@ index fd1eaf6ec95..0e07fe578e1 100644 %token KW_RWTEXTURE1D %token KW_RWTEXTURE2D %token KW_RWTEXTURE3D -@@ -3933,6 +4408,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type conditional_expr - %type declaration - %type declaration_statement -+%type discard_statement - %type equality_expr - %type expr - %type expr_optional -@@ -3968,6 +4444,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3924,37 +4692,9 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %token C_INTEGER + %token PRE_LINE + +-%type add_expr +-%type assignment_expr +-%type bitand_expr +-%type bitor_expr +-%type bitxor_expr +-%type compound_statement +-%type conditional_expr +-%type declaration +-%type declaration_statement +-%type equality_expr +-%type expr +-%type expr_optional +-%type expr_statement +-%type initializer_expr +-%type jump_statement +-%type logicand_expr +-%type logicor_expr +-%type loop_statement +-%type mul_expr +-%type postfix_expr +-%type primary_expr +-%type relational_expr +-%type selection_statement +-%type shift_expr +-%type statement +-%type statement_list +-%type struct_declaration + %type type_specs +-%type unary_expr + %type variables_def +-%type variables_def_optional ++%type variables_def_typed + + %token VAR_IDENTIFIER + %token NEW_IDENTIFIER +@@ -3968,6 +4708,36 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type attribute %type attribute_list +%type attribute_list_optional ++ ++%type add_expr ++%type assignment_expr ++%type bitand_expr ++%type bitor_expr ++%type bitxor_expr ++%type compound_statement ++%type conditional_expr ++%type declaration ++%type declaration_statement ++%type equality_expr ++%type expr ++%type expr_optional ++%type expr_statement ++%type initializer_expr ++%type jump_statement ++%type logicand_expr ++%type logicor_expr ++%type loop_statement ++%type mul_expr ++%type postfix_expr ++%type primary_expr ++%type relational_expr ++%type shift_expr ++%type selection_statement ++%type statement ++%type statement_list ++%type struct_declaration_without_vars ++%type unary_expr %type boolean -@@ -3999,6 +4476,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type +@@ -3999,6 +4769,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type parameters %type register_opt @@ -10722,7 +16941,27 @@ index fd1eaf6ec95..0e07fe578e1 100644 %type texture_type texture_ms_type uav_type -@@ -4037,7 +4515,7 @@ buffer_declaration: +@@ -4015,6 +4786,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %type type_spec + %type variable_decl + %type variable_def ++%type variable_def_typed + + %% + +@@ -4024,9 +4796,9 @@ hlsl_prog: + | hlsl_prog buffer_declaration buffer_body + | hlsl_prog declaration_statement + { +- if (!list_empty($2)) ++ if (!list_empty(&$2->instrs)) + hlsl_fixme(ctx, &@2, "Uniform initializer."); +- destroy_instr_list($2); ++ destroy_block($2); + } + | hlsl_prog preproc_directive + | hlsl_prog ';' +@@ -4037,7 +4809,7 @@ buffer_declaration: if ($3.semantic.name) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); @@ -10731,7 +16970,62 @@ index fd1eaf6ec95..0e07fe578e1 100644 YYABORT; } -@@ -4261,6 +4739,14 @@ attribute_list: +@@ -4083,25 +4855,19 @@ preproc_directive: + } + } + +-struct_declaration: +- var_modifiers struct_spec variables_def_optional ';' ++struct_declaration_without_vars: ++ var_modifiers struct_spec ';' + { +- struct hlsl_type *type; +- unsigned int modifiers = $1; ++ if (!$2->name) ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Anonymous struct type must declare a variable."); + +- if (!$3) +- { +- if (!$2->name) +- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Anonymous struct type must declare a variable."); +- if (modifiers) +- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +- "Modifiers are not allowed on struct type declarations."); +- } ++ if ($1) ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Modifiers are not allowed on struct type declarations."); + +- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; +- $$ = declare_vars(ctx, type, modifiers, &@1, $3); + } + + struct_spec: +@@ -4208,7 +4974,7 @@ attribute: + YYABORT; + } + $$->name = $2; +- list_init(&$$->instrs); ++ hlsl_block_init(&$$->instrs); + $$->loc = @$; + $$->args_count = 0; + } +@@ -4223,8 +4989,8 @@ attribute: + YYABORT; + } + $$->name = $2; +- list_init(&$$->instrs); +- list_move_tail(&$$->instrs, $4.instrs); ++ hlsl_block_init(&$$->instrs); ++ hlsl_block_add_block(&$$->instrs, $4.instrs); + vkd3d_free($4.instrs); + $$->loc = @$; + $$->args_count = $4.args_count; +@@ -4261,6 +5027,14 @@ attribute_list: $$.attrs[$$.count++] = $2; } @@ -10746,7 +17040,36 @@ index fd1eaf6ec95..0e07fe578e1 100644 func_declaration: func_prototype compound_statement { -@@ -4349,8 +4835,11 @@ func_prototype_no_attrs: +@@ -4272,15 +5046,15 @@ func_declaration: + "Function \"%s\" is already defined.", decl->func->name); + hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, + "\"%s\" was previously defined here.", decl->func->name); +- hlsl_free_instr_list($2); ++ destroy_block($2); + } + else + { + size_t i; + + decl->has_body = true; +- list_move_tail(&decl->body.instrs, $2); +- vkd3d_free($2); ++ hlsl_block_add_block(&decl->body, $2); ++ destroy_block($2); + + /* Semantics are taken from whichever definition has a body. + * We can't just replace the hlsl_ir_var pointers, though: if +@@ -4331,6 +5105,9 @@ func_prototype_no_attrs: + struct hlsl_ir_var *var; + struct hlsl_type *type; + ++ /* Functions are unconditionally inlined. */ ++ modifiers &= ~HLSL_MODIFIER_INLINE; ++ + if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Only majority modifiers are allowed on functions."); +@@ -4349,8 +5126,11 @@ func_prototype_no_attrs: "Semantics are not allowed on void functions."); } @@ -10759,7 +17082,16 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (($$.decl = get_func_decl(&ctx->functions, $3, &$5))) { -@@ -4476,17 +4965,24 @@ var_identifier: +@@ -4454,7 +5234,7 @@ func_prototype: + compound_statement: + '{' '}' + { +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; + } + | '{' scope_start statement_list '}' +@@ -4476,17 +5256,24 @@ var_identifier: colon_attribute: %empty { @@ -10788,7 +17120,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 $$.reg_reservation = $1; } -@@ -4499,6 +4995,9 @@ semantic: +@@ -4499,6 +5286,9 @@ semantic: ; $$.name = $2; $$.index = atoi(p); @@ -10798,7 +17130,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 *p = 0; } -@@ -4518,6 +5017,21 @@ register_opt: +@@ -4518,6 +5308,21 @@ register_opt: vkd3d_free($6); } @@ -10820,7 +17152,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 parameters: scope_start { -@@ -4536,7 +5050,7 @@ param_list: +@@ -4536,7 +5341,7 @@ param_list: parameter { memset(&$$, 0, sizeof($$)); @@ -10829,7 +17161,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { ERR("Error adding function parameter %s.\n", $1.name); YYABORT; -@@ -4545,7 +5059,7 @@ param_list: +@@ -4545,7 +5350,7 @@ param_list: | param_list ',' parameter { $$ = $1; @@ -10838,7 +17170,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "Parameter \"%s\" is already declared.", $3.name); -@@ -4624,7 +5138,15 @@ texture_ms_type: +@@ -4624,7 +5429,15 @@ texture_ms_type: } uav_type: @@ -10855,7 +17187,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { $$ = HLSL_SAMPLER_DIM_1D; } -@@ -4640,7 +5162,7 @@ uav_type: +@@ -4640,7 +5453,7 @@ uav_type: type_no_void: KW_VECTOR '<' type ',' C_INTEGER '>' { @@ -10864,7 +17196,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -4667,7 +5189,7 @@ type_no_void: +@@ -4667,7 +5480,7 @@ type_no_void: } | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' { @@ -10873,7 +17205,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -4702,6 +5224,10 @@ type_no_void: +@@ -4702,6 +5515,10 @@ type_no_void: { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; } @@ -10884,7 +17216,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 | KW_SAMPLER1D { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_1D]; -@@ -4716,7 +5242,7 @@ type_no_void: +@@ -4716,7 +5533,7 @@ type_no_void: } | KW_SAMPLERCUBE { @@ -10893,7 +17225,18 @@ index fd1eaf6ec95..0e07fe578e1 100644 } | KW_TEXTURE { -@@ -4740,23 +5266,58 @@ type_no_void: +@@ -4735,28 +5552,68 @@ type_no_void: + { + validate_texture_format_type(ctx, $3, &@3); + +- /* TODO: unspecified sample count is not allowed for all targets */ ++ if (shader_profile_version_lt(ctx, 4, 1)) ++ { ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); ++ } ++ + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); } | texture_ms_type '<' type ',' shift_expr '>' { @@ -10903,7 +17246,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 + struct hlsl_block block; + + hlsl_block_init(&block); -+ list_move_tail(&block.instrs, $5); ++ hlsl_block_add_block(&block, $5); + + sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); + @@ -10960,7 +17303,16 @@ index fd1eaf6ec95..0e07fe578e1 100644 $$ = hlsl_new_uav_type(ctx, $1, $3); } | TYPE_IDENTIFIER -@@ -4779,7 +5340,7 @@ type_no_void: +@@ -4764,7 +5621,7 @@ type_no_void: + $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); + if ($$->is_minimum_precision) + { +- if (ctx->profile->major_version < 4) ++ if (shader_profile_version_lt(ctx, 4, 0)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support minimum-precision types."); +@@ -4779,7 +5636,7 @@ type_no_void: | KW_STRUCT TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); @@ -10969,27 +17321,142 @@ index fd1eaf6ec95..0e07fe578e1 100644 hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "\"%s\" redefined as a structure.", $2); vkd3d_free($2); } -@@ -4934,10 +5495,17 @@ arrays: +@@ -4793,10 +5650,10 @@ type: + + declaration_statement: + declaration +- | struct_declaration ++ | struct_declaration_without_vars + | typedef + { +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; + } + +@@ -4855,22 +5712,11 @@ type_spec: + } + + declaration: +- var_modifiers type variables_def ';' ++ variables_def_typed ';' + { +- struct hlsl_type *type; +- unsigned int modifiers = $1; +- +- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ if (!($$ = initialize_vars(ctx, $1))) + YYABORT; +- $$ = declare_vars(ctx, type, modifiers, &@1, $3); +- } +- +-variables_def_optional: +- %empty +- { +- $$ = NULL; + } +- | variables_def + + variables_def: + variable_def +@@ -4885,6 +5731,33 @@ variables_def: + list_add_tail($$, &$3->entry); + } + ++variables_def_typed: ++ variable_def_typed ++ { ++ if (!($$ = make_empty_list(ctx))) ++ YYABORT; ++ list_add_head($$, &$1->entry); ++ ++ declare_var(ctx, $1); ++ } ++ | variables_def_typed ',' variable_def ++ { ++ struct parse_variable_def *head_def; ++ ++ assert(!list_empty($1)); ++ head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); ++ ++ assert(head_def->basic_type); ++ $3->basic_type = head_def->basic_type; ++ $3->modifiers = head_def->modifiers; ++ $3->modifiers_loc = head_def->modifiers_loc; ++ ++ declare_var(ctx, $3); ++ ++ $$ = $1; ++ list_add_tail($$, &$3->entry); ++ } ++ + variable_decl: + any_identifier arrays colon_attribute + { +@@ -4900,7 +5773,7 @@ state: + any_identifier '=' expr ';' + { + vkd3d_free($1); +- hlsl_free_instr_list($3); ++ destroy_block($3); + } + + state_block_start: +@@ -4926,6 +5799,38 @@ variable_def: + ctx->in_state_block = 0; + } + ++variable_def_typed: ++ var_modifiers struct_spec variable_def ++ { ++ unsigned int modifiers = $1; ++ struct hlsl_type *type; ++ ++ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ YYABORT; ++ ++ check_invalid_in_out_modifiers(ctx, modifiers, &@1); ++ ++ $$ = $3; ++ $$->basic_type = type; ++ $$->modifiers = modifiers; ++ $$->modifiers_loc = @1; ++ } ++ | var_modifiers type variable_def ++ { ++ unsigned int modifiers = $1; ++ struct hlsl_type *type; ++ ++ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) ++ YYABORT; ++ ++ check_invalid_in_out_modifiers(ctx, modifiers, &@1); ++ ++ $$ = $3; ++ $$->basic_type = type; ++ $$->modifiers = modifiers; ++ $$->modifiers_loc = @1; ++ } ++ + arrays: + %empty + { +@@ -4934,10 +5839,12 @@ arrays: } | '[' expr ']' arrays { - unsigned int size = evaluate_static_expression(node_from_list($2)); -+ struct hlsl_block block; uint32_t *new_array; + unsigned int size; - destroy_instr_list($2); -+ hlsl_clone_block(ctx, &block, &ctx->static_initializers); -+ list_move_tail(&block.instrs, $2); ++ size = evaluate_static_expression_as_uint(ctx, $2, &@2); + -+ size = evaluate_static_expression_as_uint(ctx, &block, &@2); -+ -+ hlsl_block_cleanup(&block); -+ vkd3d_free($2); ++ destroy_block($2); $$ = $4; -@@ -4988,59 +5556,59 @@ var_modifiers: +@@ -4988,59 +5895,63 @@ var_modifiers: } | KW_EXTERN var_modifiers { @@ -11060,72 +17527,155 @@ index fd1eaf6ec95..0e07fe578e1 100644 { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); ++ } ++ | KW_INLINE var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); } -@@ -5145,6 +5713,7 @@ statement: - declaration_statement - | expr_statement - | compound_statement -+ | discard_statement - | jump_statement - | selection_statement - | loop_statement -@@ -5152,7 +5721,7 @@ statement: +@@ -5050,10 +5961,10 @@ complex_initializer: + $$.args_count = 1; + if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) + { +- destroy_instr_list($1); ++ destroy_block($1); + YYABORT; + } +- $$.args[0] = node_from_list($1); ++ $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; + } +@@ -5085,7 +5996,7 @@ complex_initializer_list: + $$.args = new_args; + for (i = 0; i < $3.args_count; ++i) + $$.args[$$.args_count++] = $3.args[i]; +- list_move_tail($$.instrs, $3.instrs); ++ hlsl_block_add_block($$.instrs, $3.instrs); + free_parse_initializer(&$3); + } + +@@ -5098,10 +6009,10 @@ initializer_expr_list: + $$.args_count = 1; + if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) + { +- destroy_instr_list($1); ++ destroy_block($1); + YYABORT; + } +- $$.args[0] = node_from_list($1); ++ $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; + } +@@ -5113,13 +6024,13 @@ initializer_expr_list: + if (!(new_args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) + { + free_parse_initializer(&$$); +- destroy_instr_list($3); ++ destroy_block($3); + YYABORT; + } + $$.args = new_args; +- $$.args[$$.args_count++] = node_from_list($3); +- list_move_tail($$.instrs, $3); +- vkd3d_free($3); ++ $$.args[$$.args_count++] = node_from_block($3); ++ hlsl_block_add_block($$.instrs, $3); ++ destroy_block($3); + } + + boolean: +@@ -5137,8 +6048,8 @@ statement_list: + | statement_list statement + { + $$ = $1; +- list_move_tail($$, $2); +- vkd3d_free($2); ++ hlsl_block_add_block($$, $2); ++ destroy_block($2); + } + + statement: +@@ -5152,80 +6063,116 @@ statement: jump_statement: KW_RETURN expr ';' { - if (!add_return(ctx, $2, node_from_list($2), @1)) -+ if (!add_return(ctx, $2, node_from_list($2), &@1)) - YYABORT; +- YYABORT; $$ = $2; ++ if (!add_return(ctx, $$, node_from_block($$), &@1)) ++ YYABORT; } -@@ -5160,65 +5729,81 @@ jump_statement: + | KW_RETURN ';' { - if (!($$ = make_empty_list(ctx))) +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) ++ YYABORT; ++ if (!add_return(ctx, $$, NULL, &@1)) YYABORT; - if (!add_return(ctx, $$, NULL, @1)) -+ if (!add_return(ctx, $$, NULL, &@1)) -+ YYABORT; + } -+ -+discard_statement: -+ KW_DISCARD ';' ++ | KW_DISCARD ';' + { -+ struct hlsl_ir_node *discard; ++ struct hlsl_ir_node *discard, *c; + -+ if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) YYABORT; -+ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) ++ ++ if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) + return false; -+ list_add_tail($$, &discard->entry); ++ hlsl_block_add_instr($$, c); ++ ++ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) ++ return false; ++ hlsl_block_add_instr($$, discard); } selection_statement: - KW_IF '(' expr ')' if_body +- KW_IF '(' expr ')' if_body ++ attribute_list_optional KW_IF '(' expr ')' if_body { - struct hlsl_ir_node *condition = node_from_list($3); +- struct hlsl_ir_node *condition = node_from_list($3); - struct hlsl_ir_if *instr; -- -- if (!(instr = hlsl_new_if(ctx, condition, @1))) -+ struct hlsl_block then_block, else_block; ++ struct hlsl_ir_node *condition = node_from_block($4); ++ const struct parse_attribute_list *attributes = &$1; + struct hlsl_ir_node *instr; ++ unsigned int i; + -+ hlsl_block_init(&then_block); -+ list_move_tail(&then_block.instrs, $5.then_block); -+ hlsl_block_init(&else_block); -+ if ($5.else_block) -+ list_move_tail(&else_block.instrs, $5.else_block); -+ vkd3d_free($5.then_block); -+ vkd3d_free($5.else_block); ++ if (attribute_list_has_duplicates(attributes)) ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); + -+ if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) ++ for (i = 0; i < attributes->count; ++i) ++ { ++ const struct hlsl_attribute *attr = attributes->attrs[i]; ++ ++ if (!strcmp(attr->name, "branch") ++ || !strcmp(attr->name, "flatten")) ++ { ++ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); ++ } ++ else ++ { ++ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); ++ } ++ } + +- if (!(instr = hlsl_new_if(ctx, condition, @1))) ++ if (!(instr = hlsl_new_if(ctx, condition, $6.then_block, $6.else_block, &@2))) ++ { ++ destroy_block($6.then_block); ++ destroy_block($6.else_block); YYABORT; - list_move_tail(&instr->then_instrs.instrs, $5.then_instrs); - if ($5.else_instrs) - list_move_tail(&instr->else_instrs.instrs, $5.else_instrs); - vkd3d_free($5.then_instrs); - vkd3d_free($5.else_instrs); ++ } ++ destroy_block($6.then_block); ++ destroy_block($6.else_block); if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string; @@ -11136,9 +17686,10 @@ index fd1eaf6ec95..0e07fe578e1 100644 "if condition type %s is not scalar.", string->buffer); hlsl_release_string_buffer(ctx, string); } - $$ = $3; +- $$ = $3; - list_add_tail($$, &instr->node.entry); -+ list_add_tail($$, &instr->entry); ++ $$ = $4; ++ hlsl_block_add_instr($$, instr); } if_body: @@ -11185,7 +17736,24 @@ index fd1eaf6ec95..0e07fe578e1 100644 hlsl_pop_scope(ctx); } -@@ -5250,31 +5835,31 @@ func_arguments: + expr_optional: + %empty + { +- if (!($$ = make_empty_list(ctx))) ++ if (!($$ = make_empty_block(ctx))) + YYABORT; + } + | expr +@@ -5241,7 +6188,7 @@ func_arguments: + { + $$.args = NULL; + $$.args_count = 0; +- if (!($$.instrs = make_empty_list(ctx))) ++ if (!($$.instrs = make_empty_block(ctx))) + YYABORT; + $$.braces = false; + } +@@ -5250,31 +6197,31 @@ func_arguments: primary_expr: C_FLOAT { @@ -11195,7 +17763,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) -+ if (!($$ = make_list(ctx, c))) ++ if (!($$ = make_block(ctx, c))) YYABORT; } | C_INTEGER @@ -11206,7 +17774,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) -+ if (!($$ = make_list(ctx, c))) ++ if (!($$ = make_block(ctx, c))) YYABORT; } | boolean @@ -11217,59 +17785,72 @@ index fd1eaf6ec95..0e07fe578e1 100644 if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) -+ if (!($$ = make_list(ctx, c))) ++ if (!($$ = make_block(ctx, c))) { - hlsl_free_instr(&c->node); + hlsl_free_instr(c); YYABORT; } } -@@ -5288,7 +5873,7 @@ primary_expr: +@@ -5288,9 +6235,9 @@ primary_expr: hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); YYABORT; } - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; - if (!($$ = make_list(ctx, &load->node))) +- if (!($$ = make_list(ctx, &load->node))) ++ if (!($$ = make_block(ctx, &load->node))) YYABORT; -@@ -5316,7 +5901,7 @@ primary_expr: + } + | '(' expr ')' +@@ -5316,9 +6263,9 @@ primary_expr: if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; - if (!($$ = make_list(ctx, &load->node))) +- if (!($$ = make_list(ctx, &load->node))) ++ if (!($$ = make_block(ctx, &load->node))) YYABORT; -@@ -5332,7 +5917,7 @@ postfix_expr: + } + else +@@ -5332,27 +6279,27 @@ postfix_expr: primary_expr | postfix_expr OP_INC { - if (!add_increment(ctx, $1, false, true, @2)) + if (!add_increment(ctx, $1, false, true, &@2)) { - destroy_instr_list($1); +- destroy_instr_list($1); ++ destroy_block($1); YYABORT; -@@ -5341,7 +5926,7 @@ postfix_expr: + } + $$ = $1; } | postfix_expr OP_DEC { - if (!add_increment(ctx, $1, true, true, @2)) + if (!add_increment(ctx, $1, true, true, &@2)) { - destroy_instr_list($1); +- destroy_instr_list($1); ++ destroy_block($1); YYABORT; -@@ -5352,7 +5937,7 @@ postfix_expr: + } + $$ = $1; + } + | postfix_expr '.' any_identifier { - struct hlsl_ir_node *node = node_from_list($1); +- struct hlsl_ir_node *node = node_from_list($1); ++ struct hlsl_ir_node *node = node_from_block($1); - if (node->data_type->type == HLSL_CLASS_STRUCT) + if (node->data_type->class == HLSL_CLASS_STRUCT) { struct hlsl_type *type = node->data_type; const struct hlsl_struct_field *field; -@@ -5365,20 +5950,20 @@ postfix_expr: +@@ -5365,20 +6312,20 @@ postfix_expr: } field_idx = field - type->e.record.fields; @@ -11290,24 +17871,35 @@ index fd1eaf6ec95..0e07fe578e1 100644 YYABORT; } - list_add_tail($1, &swizzle->node.entry); -+ list_add_tail($1, &swizzle->entry); ++ hlsl_block_add_instr($1, swizzle); $$ = $1; } else -@@ -5391,10 +5976,10 @@ postfix_expr: +@@ -5389,17 +6336,17 @@ postfix_expr: + } + | postfix_expr '[' expr ']' { - struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); +- struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); ++ struct hlsl_ir_node *array = node_from_block($1), *index = node_from_block($3); - list_move_tail($1, $3); -+ list_move_head($1, $3); - vkd3d_free($3); +- vkd3d_free($3); ++ hlsl_block_add_block($3, $1); ++ destroy_block($1); - if (!add_array_load(ctx, $1, array, index, &@2)) -+ if (!add_array_access(ctx, $1, array, index, &@2)) ++ if (!add_array_access(ctx, $3, array, index, &@2)) { - destroy_instr_list($1); +- destroy_instr_list($1); ++ destroy_block($3); YYABORT; -@@ -5412,7 +5997,7 @@ postfix_expr: + } +- $$ = $1; ++ $$ = $3; + } + + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ +@@ -5412,7 +6359,7 @@ postfix_expr: free_parse_initializer(&$4); YYABORT; } @@ -11316,7 +17908,7 @@ index fd1eaf6ec95..0e07fe578e1 100644 { struct vkd3d_string_buffer *string; -@@ -5432,7 +6017,7 @@ postfix_expr: +@@ -5432,7 +6379,7 @@ postfix_expr: YYABORT; } @@ -11325,40 +17917,103 @@ index fd1eaf6ec95..0e07fe578e1 100644 { free_parse_initializer(&$4); YYABORT; -@@ -5459,7 +6044,7 @@ unary_expr: +@@ -5440,14 +6387,14 @@ postfix_expr: + } + | postfix_expr '.' any_identifier '(' func_arguments ')' + { +- struct hlsl_ir_node *object = node_from_list($1); ++ struct hlsl_ir_node *object = node_from_block($1); + +- list_move_tail($1, $5.instrs); ++ hlsl_block_add_block($1, $5.instrs); + vkd3d_free($5.instrs); + + if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) + { +- hlsl_free_instr_list($1); ++ destroy_block($1); + vkd3d_free($5.args); + YYABORT; + } +@@ -5459,18 +6406,18 @@ unary_expr: postfix_expr | OP_INC unary_expr { - if (!add_increment(ctx, $2, false, false, @1)) + if (!add_increment(ctx, $2, false, false, &@1)) { - destroy_instr_list($2); +- destroy_instr_list($2); ++ destroy_block($2); YYABORT; -@@ -5468,7 +6053,7 @@ unary_expr: + } + $$ = $2; } | OP_DEC unary_expr { - if (!add_increment(ctx, $2, true, false, @1)) + if (!add_increment(ctx, $2, true, false, &@1)) { - destroy_instr_list($2); +- destroy_instr_list($2); ++ destroy_block($2); YYABORT; -@@ -5545,31 +6130,31 @@ mul_expr: + } + $$ = $2; +@@ -5481,23 +6428,23 @@ unary_expr: + } + | '-' unary_expr + { +- add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); ++ add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_block($2), &@1); + $$ = $2; + } + | '~' unary_expr + { +- add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); ++ add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_block($2), &@1); + $$ = $2; + } + | '!' unary_expr + { +- add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); ++ add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_block($2), &@1); + $$ = $2; + } + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ + | '(' var_modifiers type arrays ')' unary_expr + { +- struct hlsl_type *src_type = node_from_list($6)->data_type; ++ struct hlsl_type *src_type = node_from_block($6)->data_type; + struct hlsl_type *dst_type; + unsigned int i; + +@@ -5533,9 +6480,9 @@ unary_expr: + YYABORT; + } + +- if (!add_cast(ctx, $6, node_from_list($6), dst_type, &@3)) ++ if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) + { +- hlsl_free_instr_list($6); ++ destroy_block($6); + YYABORT; + } + $$ = $6; +@@ -5545,114 +6492,138 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, @2); -+ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); } | mul_expr '/' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, @2); -+ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); } | mul_expr '%' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, @2); -+ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); } add_expr: @@ -11366,42 +18021,54 @@ index fd1eaf6ec95..0e07fe578e1 100644 | add_expr '+' mul_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); -+ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } | add_expr '-' mul_expr { struct hlsl_ir_node *neg; - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), @2))) -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) ++ if (!(neg = add_unary_arithmetic_expr(ctx, $3, HLSL_OP1_NEG, node_from_block($3), &@2))) YYABORT; - list_add_tail($3, &neg->entry); +- list_add_tail($3, &neg->entry); - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); -+ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } shift_expr: -@@ -5587,30 +6172,30 @@ relational_expr: + add_expr + | shift_expr OP_LEFTSHIFT add_expr + { +- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); + } + | shift_expr OP_RIGHTSHIFT add_expr + { +- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); + } + + relational_expr: shift_expr | relational_expr '<' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, @2); -+ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); } | relational_expr '>' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, @2); -+ $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); ++ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); } | relational_expr OP_LE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, @2); -+ $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); } | relational_expr OP_GE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, @2); -+ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); } equality_expr: @@ -11409,27 +18076,69 @@ index fd1eaf6ec95..0e07fe578e1 100644 | equality_expr OP_EQ relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, @2); -+ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); } | equality_expr OP_NE relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, @2); -+ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); } bitand_expr: -@@ -5652,7 +6237,26 @@ conditional_expr: + equality_expr + | bitand_expr '&' equality_expr + { +- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); + } + + bitxor_expr: + bitand_expr + | bitxor_expr '^' bitand_expr + { +- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); + } + + bitor_expr: + bitxor_expr + | bitor_expr '|' bitxor_expr + { +- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); + } + + logicand_expr: + bitor_expr + | logicand_expr OP_AND bitor_expr + { +- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); + } + + logicor_expr: + logicand_expr + | logicor_expr OP_OR logicand_expr + { +- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); ++ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); + } + + conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { - hlsl_fixme(ctx, &@$, "Ternary operator."); -+ struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); ++ struct hlsl_ir_node *cond = node_from_block($1); ++ struct hlsl_ir_node *first = node_from_block($3); ++ struct hlsl_ir_node *second = node_from_block($5); ++ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_type *common_type; + -+ list_move_tail($1, $3); -+ list_move_tail($1, $5); -+ vkd3d_free($3); -+ vkd3d_free($5); ++ hlsl_block_add_block($1, $3); ++ hlsl_block_add_block($1, $5); ++ destroy_block($3); ++ destroy_block($5); + + if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) + YYABORT; @@ -11440,14 +18149,45 @@ index fd1eaf6ec95..0e07fe578e1 100644 + if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) + YYABORT; + -+ if (!hlsl_add_conditional(ctx, $1, cond, first, second)) ++ args[0] = cond; ++ args[1] = first; ++ args[2] = second; ++ if (!add_expr(ctx, $1, HLSL_OP3_TERNARY, args, common_type, &@1)) + YYABORT; + $$ = $1; } assignment_expr: +@@ -5660,15 +6631,15 @@ assignment_expr: + conditional_expr + | unary_expr assign_op assignment_expr + { +- struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); ++ struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); + + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); + YYABORT; + } +- list_move_tail($3, $1); +- vkd3d_free($1); ++ hlsl_block_add_block($3, $1); ++ destroy_block($1); + if (!add_assignment(ctx, $3, lhs, $2, rhs)) + YYABORT; + $$ = $3; +@@ -5725,6 +6696,6 @@ expr: + | expr ',' assignment_expr + { + $$ = $1; +- list_move_tail($$, $3); +- vkd3d_free($3); ++ hlsl_block_add_block($$, $3); ++ destroy_block($3); + } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index ab59875738c..765b1907426 100644 +index ab59875738c..be024842164 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -27,11 +27,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str @@ -11521,7 +18261,12 @@ index ab59875738c..765b1907426 100644 } return idx_offset; -@@ -101,7 +101,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st +@@ -97,11 +97,12 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) + { ++ enum hlsl_regset regset = hlsl_type_get_regset(deref->data_type); + struct hlsl_ir_node *offset = NULL; struct hlsl_type *type; unsigned int i; @@ -11530,8 +18275,12 @@ index ab59875738c..765b1907426 100644 assert(deref->var); type = deref->var->data_type; -@@ -114,7 +114,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - deref->offset_regset, loc))) +@@ -111,10 +112,10 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st + struct hlsl_block idx_block; + + if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, +- deref->offset_regset, loc))) ++ regset, loc))) return NULL; - list_move_tail(&block->instrs, &idx_block.instrs); @@ -11539,7 +18288,7 @@ index ab59875738c..765b1907426 100644 type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); } -@@ -123,15 +123,14 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st +@@ -123,15 +124,14 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st } /* TODO: remove when no longer needed, only used for transform_deref_paths_into_offsets() */ @@ -11547,7 +18296,8 @@ index ab59875738c..765b1907426 100644 +static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { - const struct hlsl_type *type; +- const struct hlsl_type *type; ++ struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block; @@ -11557,7 +18307,7 @@ index ab59875738c..765b1907426 100644 /* register offsets shouldn't be used before this point is reached. */ assert(!deref->offset.node); -@@ -140,48 +139,22 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der +@@ -140,65 +140,39 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der /* Instructions that directly refer to structs or arrays (instead of single-register components) * are removed later by dce. So it is not a problem to just cleanup their derefs. */ @@ -11569,7 +18319,8 @@ index ab59875738c..765b1907426 100644 + return true; } - deref->offset_regset = hlsl_type_get_regset(type); +- deref->offset_regset = hlsl_type_get_regset(type); ++ deref->data_type = type; if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) - return; @@ -11592,7 +18343,7 @@ index ab59875738c..765b1907426 100644 - case HLSL_IR_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_store(instr)->lhs, instr); - return true; - +- - case HLSL_IR_RESOURCE_LOAD: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->resource, instr); - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); @@ -11601,7 +18352,7 @@ index ab59875738c..765b1907426 100644 - case HLSL_IR_RESOURCE_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_store(instr)->resource, instr); - return true; -- + - default: - return false; - } @@ -11610,13 +18361,17 @@ index ab59875738c..765b1907426 100644 } /* Split uniforms into two variables representing the constant and temp -@@ -191,14 +164,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru + * registers, and copy the former to the latter, so that writes to uniforms + * work. */ +-static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) ++static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) { - struct vkd3d_string_buffer *name; +- struct vkd3d_string_buffer *name; struct hlsl_ir_var *uniform; - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; struct hlsl_ir_load *load; ++ char *new_name; /* Use the synthetic name for the temp, rather than the uniform, so that we * can write the uniform name into the shader reflection data. */ @@ -11627,14 +18382,23 @@ index ab59875738c..765b1907426 100644 return; list_add_before(&temp->scope_entry, &uniform->scope_entry); list_add_tail(&ctx->extern_vars, &uniform->extern_entry); -@@ -212,17 +185,53 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru - temp->name = hlsl_strdup(ctx, name->buffer); - hlsl_release_string_buffer(ctx, name); +@@ -206,45 +180,111 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru + uniform->is_param = temp->is_param; + uniform->buffer = temp->buffer; + +- if (!(name = hlsl_get_string_buffer(ctx))) ++ if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) + return; +- vkd3d_string_buffer_printf(name, "", temp->name); +- temp->name = hlsl_strdup(ctx, name->buffer); +- hlsl_release_string_buffer(ctx, name); ++ temp->name = new_name; - if (!(load = hlsl_new_var_load(ctx, uniform, temp->loc))) + if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) return; - list_add_head(instrs, &load->node.entry); +- list_add_head(instrs, &load->node.entry); ++ list_add_head(&block->instrs, &load->node.entry); if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) return; @@ -11683,17 +18447,18 @@ index ab59875738c..765b1907426 100644 + uint32_t index, bool output, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; - struct vkd3d_string_buffer *name; -@@ -230,15 +239,50 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir +- struct vkd3d_string_buffer *name; + struct hlsl_ir_var *ext_var; ++ char *new_name; - if (!(name = hlsl_get_string_buffer(ctx))) +- if (!(name = hlsl_get_string_buffer(ctx))) ++ if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) return NULL; - vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, semantic->index); -+ vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, index); + + LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { -+ if (!ascii_strcasecmp(ext_var->name, name->buffer)) ++ if (!ascii_strcasecmp(ext_var->name, new_name)) + { + if (output) + { @@ -11720,30 +18485,39 @@ index ab59875738c..765b1907426 100644 + } + } + -+ hlsl_release_string_buffer(ctx, name); ++ vkd3d_free(new_name); + return ext_var; + } + } + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) { - hlsl_release_string_buffer(ctx, name); +- hlsl_release_string_buffer(ctx, name); ++ vkd3d_free(new_name); return NULL; } - new_semantic.index = semantic->index; - if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), - type, var->loc, &new_semantic, modifiers, NULL))) + new_semantic.index = index; -+ if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, loc, &new_semantic, -+ modifiers, NULL))) ++ if (!(ext_var = hlsl_new_var(ctx, new_name, type, loc, &new_semantic, modifiers, NULL))) { - hlsl_release_string_buffer(ctx, name); +- hlsl_release_string_buffer(ctx, name); ++ vkd3d_free(new_name); hlsl_cleanup_semantic(&new_semantic); -@@ -257,80 +301,116 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + return NULL; + } +- hlsl_release_string_buffer(ctx, name); + if (output) + ext_var->is_output_semantic = 1; + else +@@ -256,132 +296,175 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + return ext_var; } - static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, +-static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) ++static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { - struct hlsl_type *type = lhs->node.data_type, *vector_type; @@ -11765,9 +18539,10 @@ index ab59875738c..765b1907426 100644 + if (!semantic->name) + return; + -+ vector_type_src = hlsl_get_vector_type(ctx, type->base_type, -+ (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); + vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); ++ vector_type_src = vector_type_dst; ++ if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); for (i = 0; i < hlsl_type_major_size(type); ++i) { @@ -11822,7 +18597,7 @@ index ab59875738c..765b1907426 100644 } -static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs) -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &lhs->node.loc; @@ -11883,33 +18658,39 @@ index ab59875738c..765b1907426 100644 + return; + list_add_after(&c->entry, &element_load->node.entry); + -+ prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { -+ prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); ++ prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); } } -@@ -341,45 +421,51 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st + /* Split inputs into two variables representing the semantic and temp registers, + * and copy the former to the latter, so that writes to input variables work. */ +-static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) ++static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) + { struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_head(instrs, &load->node.entry); +- list_add_head(instrs, &load->node.entry); ++ list_add_head(&block->instrs, &load->node.entry); - if (var->data_type->type == HLSL_CLASS_STRUCT) - prepend_input_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - prepend_input_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); -+ prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); } - static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, +-static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) ++static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; @@ -11952,24 +18733,33 @@ index ab59875738c..765b1907426 100644 if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->node.entry); -+ list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); - if (!(load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + } + else + { +@@ -389,65 +472,81 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) + return; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); } -@@ -394,38 +480,57 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - list_add_tail(instrs, &store->node.entry); -+ list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); } } -static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs) -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &rhs->node.loc; @@ -12016,7 +18806,7 @@ index ab59875738c..765b1907426 100644 - list_add_tail(instrs, &field_load->node.entry); + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; -+ list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); - if (field->type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, field_load); @@ -12027,31 +18817,37 @@ index ab59875738c..765b1907426 100644 - "Field '%s' is missing a semantic.", field->name); + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; -+ list_add_tail(instrs, &element_load->node.entry); ++ hlsl_block_add_instr(block, &element_load->node); + -+ append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { -+ append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); ++ append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); } } -@@ -437,17 +542,14 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st + /* Split outputs into two variables representing the temp and semantic + * registers, and copy the former to the latter, so that reads from output + * variables work. */ +-static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) ++static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) + { struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_tail(instrs, &load->node.entry); +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); - if (var->data_type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - append_output_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); -+ append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); } -static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -12059,7 +18855,7 @@ index ab59875738c..765b1907426 100644 struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; -@@ -459,11 +561,11 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx +@@ -459,11 +558,11 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -12074,10 +18870,41 @@ index ab59875738c..765b1907426 100644 progress |= func(ctx, instr, context); } -@@ -471,6 +573,44 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx +@@ -471,6 +570,75 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx return progress; } ++typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); ++ ++static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ PFN_lower_func func = context; ++ struct hlsl_block block; ++ ++ hlsl_block_init(&block); ++ if (func(ctx, instr, &block)) ++ { ++ struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); ++ ++ list_move_before(&instr->entry, &block.instrs); ++ hlsl_replace_node(instr, replacement); ++ return true; ++ } ++ else ++ { ++ hlsl_block_cleanup(&block); ++ return false; ++ } ++} ++ ++/* Specific form of transform_ir() for passes which convert a single instruction ++ * to a block of one or more instructions. This helper takes care of setting up ++ * the block and calling hlsl_replace_node_with_block(). */ ++static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) ++{ ++ return hlsl_transform_ir(ctx, call_lower_func, block, func); ++} ++ +static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + bool res; @@ -12119,7 +18946,7 @@ index ab59875738c..765b1907426 100644 struct recursive_call_ctx { const struct hlsl_ir_function_decl **backtrace; -@@ -506,7 +646,7 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -506,7 +674,7 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst return false; call_ctx->backtrace[call_ctx->count++] = decl; @@ -12128,7 +18955,7 @@ index ab59875738c..765b1907426 100644 --call_ctx->count; -@@ -516,21 +656,23 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -516,21 +684,23 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static void insert_early_return_break(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) { @@ -12146,7 +18973,7 @@ index ab59875738c..765b1907426 100644 list_add_after(&cf_instr->entry, &load->node.entry); - if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) return; - list_add_after(&load->node.entry, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump); @@ -12159,7 +18986,7 @@ index ab59875738c..765b1907426 100644 } /* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ -@@ -566,7 +708,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun +@@ -566,7 +736,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun * the CF instruction, shove it into an if block, and then lower that if * block. * @@ -12168,7 +18995,7 @@ index ab59875738c..765b1907426 100644 * and run this pass multiple times, but we already know the only block * that still needs to be addressed, so there's not much point.) * -@@ -591,8 +733,8 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun +@@ -591,8 +761,8 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -12179,7 +19006,7 @@ index ab59875738c..765b1907426 100644 if (has_early_return) { -@@ -628,18 +770,17 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun +@@ -628,18 +798,17 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (instr->type == HLSL_IR_JUMP) { struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); @@ -12202,7 +19029,7 @@ index ab59875738c..765b1907426 100644 has_early_return = true; if (in_loop) -@@ -675,9 +816,9 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun +@@ -675,9 +844,9 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (cf_instr) { struct list *tail = list_tail(&block->instrs); @@ -12214,7 +19041,7 @@ index ab59875738c..765b1907426 100644 /* If we're in a loop, we should have used "break" instead. */ assert(!in_loop); -@@ -685,21 +826,21 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun +@@ -685,21 +854,21 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun if (tail == &cf_instr->entry) return has_early_return; @@ -12246,7 +19073,7 @@ index ab59875738c..765b1907426 100644 } return has_early_return; -@@ -721,7 +862,6 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * +@@ -721,7 +890,6 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Function \"%s\" is not defined.", decl->func->name); @@ -12254,7 +19081,7 @@ index ab59875738c..765b1907426 100644 if (!hlsl_clone_block(ctx, &block, &decl->body)) return false; list_move_before(&call->node.entry, &block.instrs); -@@ -731,6 +871,142 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * +@@ -731,6 +899,191 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * return true; } @@ -12293,6 +19120,55 @@ index ab59875738c..765b1907426 100644 + return &coords_load->node; +} + ++/* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that ++ * represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of ++ * an assignment or as a value made from different components of the matrix. The former cases should ++ * have already been split into several separate assignments, but the latter are lowered by this ++ * pass. */ ++static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_swizzle *swizzle; ++ struct hlsl_ir_load *var_load; ++ struct hlsl_deref var_deref; ++ struct hlsl_type *matrix_type; ++ struct hlsl_ir_var *var; ++ unsigned int x, y, k, i; ++ ++ if (instr->type != HLSL_IR_SWIZZLE) ++ return false; ++ swizzle = hlsl_ir_swizzle(instr); ++ matrix_type = swizzle->val.node->data_type; ++ if (matrix_type->class != HLSL_CLASS_MATRIX) ++ return false; ++ ++ if (!(var = hlsl_new_synthetic_var(ctx, "matrix-swizzle", instr->data_type, &instr->loc))) ++ return false; ++ hlsl_init_simple_deref_from_var(&var_deref, var); ++ ++ for (i = 0; i < instr->data_type->dimx; ++i) ++ { ++ struct hlsl_block store_block; ++ struct hlsl_ir_node *load; ++ ++ y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; ++ x = (swizzle->swizzle >> 8 * i) & 0xf; ++ k = y * matrix_type->dimx + x; ++ ++ if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) ++ return false; ++ ++ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, load)) ++ return false; ++ hlsl_block_add_block(block, &store_block); ++ } ++ ++ if (!(var_load = hlsl_new_var_load(ctx, var, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, &var_load->node); ++ ++ return true; ++} ++ +/* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct + * record access before knowing if they will be used in the lhs of an assignment --in which case + * they are lowered into a deref-- or as the load of an element within a larger value. @@ -12397,7 +19273,7 @@ index ab59875738c..765b1907426 100644 /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { -@@ -746,26 +1022,24 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v +@@ -746,26 +1099,24 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type; @@ -12430,7 +19306,7 @@ index ab59875738c..765b1907426 100644 } hlsl_replace_node(&cast->node, replacement); -@@ -949,9 +1223,9 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ +@@ -949,9 +1300,9 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ path_node = deref->path[depth].node; subtype = hlsl_get_element_type_from_path_index(ctx, type, path_node); @@ -12442,7 +19318,7 @@ index ab59875738c..765b1907426 100644 for (i = 0; i < idx; ++i) comp_start += hlsl_type_component_count(type->e.record.fields[i].type); -@@ -966,7 +1240,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ +@@ -966,7 +1317,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ if (path_node->type == HLSL_IR_CONSTANT) { copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, @@ -12451,7 +19327,7 @@ index ab59875738c..765b1907426 100644 } else { -@@ -1041,14 +1315,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, +@@ -1041,14 +1392,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); @@ -12470,7 +19346,7 @@ index ab59875738c..765b1907426 100644 } hlsl_replace_node(instr, new_instr); -@@ -1061,9 +1335,9 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, +@@ -1061,9 +1412,9 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, { const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); const struct hlsl_ir_var *var = deref->var; @@ -12482,7 +19358,7 @@ index ab59875738c..765b1907426 100644 if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) return false; -@@ -1076,21 +1350,17 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, +@@ -1076,21 +1427,17 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, || value->node->type != HLSL_IR_CONSTANT) return false; @@ -12508,7 +19384,7 @@ index ab59875738c..765b1907426 100644 return true; } -@@ -1099,7 +1369,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, +@@ -1099,7 +1446,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, { struct hlsl_type *type = load->node.data_type; @@ -12517,7 +19393,7 @@ index ab59875738c..765b1907426 100644 { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: -@@ -1220,7 +1490,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s +@@ -1220,7 +1567,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s { unsigned int writemask = store->writemask; @@ -12526,7 +19402,7 @@ index ab59875738c..765b1907426 100644 writemask = VKD3DSP_WRITEMASK_0; copy_propagation_set_value(var_def, start, writemask, store->rhs.node); } -@@ -1270,8 +1540,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct +@@ -1270,8 +1617,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -12537,7 +19413,7 @@ index ab59875738c..765b1907426 100644 break; } -@@ -1301,19 +1571,19 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if +@@ -1301,19 +1648,19 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if bool progress = false; copy_propagation_state_init(ctx, &inner_state, state); @@ -12561,7 +19437,7 @@ index ab59875738c..765b1907426 100644 return progress; } -@@ -1379,7 +1649,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b +@@ -1379,7 +1726,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress; } @@ -12570,7 +19446,34 @@ index ab59875738c..765b1907426 100644 { struct copy_propagation_state state; bool progress; -@@ -1471,7 +1741,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ +@@ -1419,7 +1766,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ + { + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + +- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (!load->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource must have a single uniform source."); +@@ -1434,7 +1781,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ + + if (load->sampler.var) + { +- if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (!load->sampler.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler must have a single uniform source."); +@@ -1452,7 +1799,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ + { + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + +- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (!store->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Accessed resource must have a single uniform source."); +@@ -1471,7 +1818,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ static bool is_vec1(const struct hlsl_type *type) { @@ -12579,7 +19482,7 @@ index ab59875738c..765b1907426 100644 } static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -1505,21 +1775,20 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -1505,21 +1852,20 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, const struct hlsl_ir_load *load, const unsigned int idx, struct hlsl_type *type) { @@ -12606,7 +19509,7 @@ index ab59875738c..765b1907426 100644 return true; } -@@ -1538,7 +1807,7 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -1538,7 +1884,7 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; @@ -12615,7 +19518,7 @@ index ab59875738c..765b1907426 100644 return false; element_type = type->e.array.type; -@@ -1575,7 +1844,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -1575,7 +1921,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; @@ -12624,7 +19527,7 @@ index ab59875738c..765b1907426 100644 return false; if (rhs->type != HLSL_IR_LOAD) -@@ -1614,7 +1883,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -1614,13 +1960,13 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; @@ -12633,7 +19536,14 @@ index ab59875738c..765b1907426 100644 return false; element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -@@ -1649,22 +1918,21 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + if (rhs->type != HLSL_IR_LOAD) + { +- hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); ++ hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); + return false; + } + +@@ -1649,22 +1995,21 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type; @@ -12662,7 +19572,7 @@ index ab59875738c..765b1907426 100644 return true; } -@@ -1684,8 +1952,7 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -1684,8 +2029,7 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (next_instr->type == HLSL_IR_SWIZZLE) { @@ -12672,7 +19582,7 @@ index ab59875738c..765b1907426 100644 unsigned int combined_swizzle; combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, -@@ -1695,9 +1962,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -1695,9 +2039,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) return false; @@ -12684,7 +19594,7 @@ index ab59875738c..765b1907426 100644 return true; } -@@ -1725,6 +1991,81 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i +@@ -1725,6 +2068,212 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; } @@ -12762,11 +19672,142 @@ index ab59875738c..765b1907426 100644 + + return false; +} ++ ++/* Lower combined samples and sampler variables to synthesized separated textures and samplers. ++ * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ ++static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_resource_load *load; ++ struct vkd3d_string_buffer *name; ++ struct hlsl_ir_var *var; ++ unsigned int i; ++ ++ if (instr->type != HLSL_IR_RESOURCE_LOAD) ++ return false; ++ load = hlsl_ir_resource_load(instr); ++ ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_LOAD: ++ case HLSL_RESOURCE_GATHER_RED: ++ case HLSL_RESOURCE_GATHER_GREEN: ++ case HLSL_RESOURCE_GATHER_BLUE: ++ case HLSL_RESOURCE_GATHER_ALPHA: ++ case HLSL_RESOURCE_RESINFO: ++ case HLSL_RESOURCE_SAMPLE_CMP: ++ case HLSL_RESOURCE_SAMPLE_CMP_LZ: ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ case HLSL_RESOURCE_SAMPLE_INFO: ++ return false; ++ ++ case HLSL_RESOURCE_SAMPLE: ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ break; ++ } ++ if (load->sampler.var) ++ return false; ++ ++ if (!hlsl_type_is_resource(load->resource.var->data_type)) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); ++ return false; ++ } ++ ++ assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); ++ ++ if (!(name = hlsl_get_string_buffer(ctx))) ++ return false; ++ vkd3d_string_buffer_printf(name, "%s", load->resource.var->name); ++ ++ TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); ++ ++ if (!(var = hlsl_get_var(ctx->globals, name->buffer))) ++ { ++ struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); ++ ++ /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ ++ struct hlsl_type *arr_type = load->resource.var->data_type; ++ for (i = 0; i < load->resource.path_len; ++i) ++ { ++ assert(arr_type->class == HLSL_CLASS_ARRAY); ++ texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); ++ arr_type = arr_type->e.array.type; ++ } ++ ++ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false))) ++ { ++ hlsl_release_string_buffer(ctx, name); ++ return false; ++ } ++ var->is_uniform = 1; ++ var->is_separated_resource = true; ++ ++ list_add_tail(&ctx->extern_vars, &var->extern_entry); ++ } ++ hlsl_release_string_buffer(ctx, name); ++ ++ if (load->sampling_dim != var->data_type->sampler_dim) ++ { ++ hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, ++ "Cannot split combined samplers from \"%s\" if they have different usage dimensions.", ++ load->resource.var->name); ++ hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here."); ++ return false; ++ ++ } ++ ++ hlsl_copy_deref(ctx, &load->sampler, &load->resource); ++ load->resource.var = var; ++ assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); ++ assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); ++ ++ return true; ++} ++ ++static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, ++ enum hlsl_regset regset) ++{ ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->bind_count[regset] < to_add->bind_count[regset]) ++ { ++ list_add_before(&var->extern_entry, &to_add->extern_entry); ++ return; ++ } ++ } ++ ++ list_add_tail(list, &to_add->extern_entry); ++} ++ ++static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) ++{ ++ struct list separated_resources; ++ struct hlsl_ir_var *var, *next; ++ ++ list_init(&separated_resources); ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_separated_resource) ++ { ++ list_remove(&var->extern_entry); ++ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); ++ } ++ } ++ ++ list_move_head(&ctx->extern_vars, &separated_resources); ++ ++ return false; ++} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { -@@ -1737,7 +2078,7 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi +@@ -1737,7 +2286,7 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi if (expr->op != HLSL_OP2_DIV) return false; @@ -12775,7 +19816,7 @@ index ab59875738c..765b1907426 100644 return false; list_add_before(&expr->node.entry, &rcp->entry); expr->op = HLSL_OP2_MUL; -@@ -1758,7 +2099,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c +@@ -1758,7 +2307,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c if (expr->op != HLSL_OP1_SQRT) return false; @@ -12784,7 +19825,7 @@ index ab59875738c..765b1907426 100644 return false; list_add_before(&expr->node.entry, &rsq->entry); expr->op = HLSL_OP1_RCP; -@@ -1770,9 +2111,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c +@@ -1770,9 +2319,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c /* Lower DP2 to MUL + ADD */ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -12795,7 +19836,7 @@ index ab59875738c..765b1907426 100644 struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) -@@ -1791,11 +2130,11 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co +@@ -1791,11 +2338,11 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (!(zero = hlsl_new_float_constant(ctx, 0.0f, &expr->node.loc))) return false; @@ -12809,7 +19850,7 @@ index ab59875738c..765b1907426 100644 if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc))) return false; -@@ -1808,13 +2147,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co +@@ -1808,13 +2355,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) return false; @@ -12826,7 +19867,7 @@ index ab59875738c..765b1907426 100644 return false; } list_add_before(&instr->entry, &replacement->entry); -@@ -1836,7 +2175,7 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co +@@ -1836,7 +2383,7 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (expr->op != HLSL_OP1_ABS) return false; @@ -12835,58 +19876,43 @@ index ab59875738c..765b1907426 100644 return false; list_add_before(&instr->entry, &neg->entry); -@@ -1848,77 +2187,124 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co +@@ -1848,10 +2395,104 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co return true; } --static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +/* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */ +static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_type *type = instr->data_type, *arg_type; -- struct hlsl_ir_constant *zero; ++{ + struct hlsl_ir_node *arg, *neg, *sum, *frc, *half, *replacement; + struct hlsl_type *type = instr->data_type; + struct hlsl_constant_value half_value; + unsigned int i, component_count; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) - return false; ++ struct hlsl_ir_expr *expr; + - expr = hlsl_ir_expr(instr); -- if (expr->op != HLSL_OP1_CAST) -- return false; -- arg_type = expr->operands[0].node->data_type; -- if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ ++ expr = hlsl_ir_expr(instr); + arg = expr->operands[0].node; + if (expr->op != HLSL_OP1_ROUND) - return false; -- if (type->base_type != HLSL_TYPE_BOOL) ++ return false; + + component_count = hlsl_type_component_count(type); + for (i = 0; i < component_count; ++i) + half_value.u[i].f = 0.5f; + if (!(half = hlsl_new_constant(ctx, type, &half_value, &expr->node.loc))) - return false; - -- /* Narrowing casts should have already been lowered. */ -- assert(type->dimx == arg_type->dimx); ++ return false; ++ + list_add_before(&instr->entry, &half->entry); - -- zero = hlsl_new_constant(ctx, arg_type, &instr->loc); -- if (!zero) ++ + if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, half))) - return false; -- list_add_before(&instr->entry, &zero->node.entry); ++ return false; + list_add_before(&instr->entry, &sum->entry); - -- expr->op = HLSL_OP2_NEQUAL; -- hlsl_src_from_node(&expr->operands[1], &zero->node); ++ + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, sum, &instr->loc))) + return false; + list_add_before(&instr->entry, &frc->entry); - ++ + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, frc, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); @@ -12896,43 +19922,95 @@ index ab59875738c..765b1907426 100644 + list_add_before(&instr->entry, &replacement->entry); + + hlsl_replace_node(instr, replacement); ++ return true; ++} ++ ++/* Use 'movc' for the ternary operator. */ ++static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], *replacement; ++ struct hlsl_ir_node *zero, *cond, *first, *second; ++ struct hlsl_constant_value zero_value = { 0 }; ++ struct hlsl_ir_expr *expr; ++ struct hlsl_type *type; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP3_TERNARY) ++ return false; ++ ++ cond = expr->operands[0].node; ++ first = expr->operands[1].node; ++ second = expr->operands[2].node; ++ ++ if (cond->data_type->base_type == HLSL_TYPE_FLOAT) ++ { ++ if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) ++ return false; ++ list_add_tail(&instr->entry, &zero->entry); ++ ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = zero; ++ operands[1] = cond; ++ type = cond->data_type; ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); ++ if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &cond->entry); ++ } ++ ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = cond; ++ operands[1] = first; ++ operands[2] = second; ++ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &replacement->entry); ++ ++ hlsl_replace_node(instr, replacement); ++ return true; ++} ++ + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_type *type = instr->data_type, *arg_type; +- struct hlsl_ir_constant *zero; ++ static const struct hlsl_constant_value zero_value; ++ struct hlsl_ir_node *zero; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) +@@ -1860,7 +2501,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + if (expr->op != HLSL_OP1_CAST) + return false; + arg_type = expr->operands[0].node->data_type; +- if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) ++ if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_BOOL) + return false; +@@ -1868,57 +2509,58 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + /* Narrowing casts should have already been lowered. */ + assert(type->dimx == arg_type->dimx); + +- zero = hlsl_new_constant(ctx, arg_type, &instr->loc); ++ zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); + if (!zero) + return false; +- list_add_before(&instr->entry, &zero->node.entry); ++ list_add_before(&instr->entry, &zero->entry); + + expr->op = HLSL_OP2_NEQUAL; +- hlsl_src_from_node(&expr->operands[1], &zero->node); ++ hlsl_src_from_node(&expr->operands[1], zero); + return true; } -struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_type *type = instr->data_type, *arg_type; -+ static const struct hlsl_constant_value zero_value; -+ struct hlsl_ir_node *zero; -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP1_CAST) -+ return false; -+ arg_type = expr->operands[0].node->data_type; -+ if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) -+ return false; -+ if (type->base_type != HLSL_TYPE_BOOL) -+ return false; -+ -+ /* Narrowing casts should have already been lowered. */ -+ assert(type->dimx == arg_type->dimx); -+ -+ zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); -+ if (!zero) -+ return false; -+ list_add_before(&instr->entry, &zero->entry); -+ -+ expr->op = HLSL_OP2_NEQUAL; -+ hlsl_src_from_node(&expr->operands[1], zero); -+ -+ return true; -+} -+ -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, ++struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { - struct hlsl_ir_store *store; @@ -12962,24 +20040,26 @@ index ab59875738c..765b1907426 100644 return NULL; - list_add_tail(&iff->else_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&else_block, store); - -- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) ++ + if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) + return NULL; -+ list_add_tail(instrs, &iff->entry); -+ ++ hlsl_block_add_instr(instrs, iff); + +- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) return NULL; - list_add_tail(instrs, &load->node.entry); +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(instrs, &load->node); - return load; + return &load->node; } - static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +-static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg; -+ struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; ++ struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; @@ -12989,7 +20069,7 @@ index ab59875738c..765b1907426 100644 unsigned int i; if (instr->type != HLSL_IR_EXPR) -@@ -1928,69 +2314,67 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -1928,69 +2570,63 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DIV) return false; @@ -13003,7 +20083,8 @@ index ab59875738c..765b1907426 100644 if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) return false; - list_add_before(&instr->entry, &xor->entry); +- list_add_before(&instr->entry, &xor->entry); ++ hlsl_block_add_instr(block, xor); - if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) - return false; @@ -13013,61 +20094,66 @@ index ab59875738c..765b1907426 100644 + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; -+ list_add_before(&instr->entry, &high_bit->entry); ++ hlsl_block_add_instr(block, high_bit); - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) return false; - list_add_before(&instr->entry, &and->entry); +- list_add_before(&instr->entry, &and->entry); ++ hlsl_block_add_instr(block, and); - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; - list_add_before(&instr->entry, &abs1->entry); +- list_add_before(&instr->entry, &abs1->entry); ++ hlsl_block_add_instr(block, abs1); if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); -+ list_add_before(&instr->entry, &cast1->entry); ++ hlsl_block_add_instr(block, cast1); - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &abs2->entry); +- list_add_before(&instr->entry, &abs2->entry); ++ hlsl_block_add_instr(block, abs2); if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); -+ list_add_before(&instr->entry, &cast2->entry); ++ hlsl_block_add_instr(block, cast2); - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) return false; - list_add_before(&instr->entry, &div->entry); +- list_add_before(&instr->entry, &div->entry); ++ hlsl_block_add_instr(block, div); if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); -+ list_add_before(&instr->entry, &cast3->entry); - +- - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; - list_add_before(&instr->entry, &neg->entry); +- return false; +- list_add_before(&instr->entry, &neg->entry); ++ hlsl_block_add_instr(block, cast3); - if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) -+ if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; - hlsl_replace_node(instr, &cond->node); -+ hlsl_replace_node(instr, cond); ++ hlsl_block_add_instr(block, neg); - return true; +- return true; ++ return hlsl_add_conditional(ctx, block, and, neg, cast3); } - static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +-static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg; -+ struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; ++ struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; @@ -13077,7 +20163,7 @@ index ab59875738c..765b1907426 100644 unsigned int i; if (instr->type != HLSL_IR_EXPR) -@@ -2000,53 +2384,53 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -2000,55 +2636,51 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; @@ -13097,57 +20183,63 @@ index ab59875738c..765b1907426 100644 + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; -+ list_add_before(&instr->entry, &high_bit->entry); ++ hlsl_block_add_instr(block, high_bit); - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) return false; - list_add_before(&instr->entry, &and->entry); +- list_add_before(&instr->entry, &and->entry); ++ hlsl_block_add_instr(block, and); - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; - list_add_before(&instr->entry, &abs1->entry); +- list_add_before(&instr->entry, &abs1->entry); ++ hlsl_block_add_instr(block, abs1); if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); -+ list_add_before(&instr->entry, &cast1->entry); ++ hlsl_block_add_instr(block, cast1); - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &abs2->entry); +- list_add_before(&instr->entry, &abs2->entry); ++ hlsl_block_add_instr(block, abs2); if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); -+ list_add_before(&instr->entry, &cast2->entry); ++ hlsl_block_add_instr(block, cast2); - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) return false; - list_add_before(&instr->entry, &div->entry); +- list_add_before(&instr->entry, &div->entry); ++ hlsl_block_add_instr(block, div); if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); -+ list_add_before(&instr->entry, &cast3->entry); - +- - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; - list_add_before(&instr->entry, &neg->entry); +- return false; +- list_add_before(&instr->entry, &neg->entry); ++ hlsl_block_add_instr(block, cast3); - if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) -+ if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; - hlsl_replace_node(instr, &cond->node); -+ hlsl_replace_node(instr, cond); ++ hlsl_block_add_instr(block, neg); - return true; +- return true; ++ return hlsl_add_conditional(ctx, block, and, neg, cast3); } -@@ -2063,14 +2447,14 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void + + static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -2063,14 +2695,14 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void if (expr->op != HLSL_OP1_ABS) return false; @@ -13164,12 +20256,14 @@ index ab59875738c..765b1907426 100644 return false; list_add_before(&instr->entry, &neg->entry); -@@ -2080,12 +2464,63 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void +@@ -2080,12 +2712,63 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void return true; } +-static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ + { +- struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc; + struct hlsl_ir_node *arg1, *arg2, *mult, *comps[4] = {0}, *res; + struct hlsl_type *type = instr->data_type; + struct hlsl_ir_expr *expr; @@ -13220,10 +20314,9 @@ index ab59875738c..765b1907426 100644 + return false; +} + - static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc; -+ struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; ++static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one, *mul3; struct hlsl_type *type = instr->data_type, *btype; - struct hlsl_ir_constant *one; - struct hlsl_ir_load *cond; @@ -13231,7 +20324,7 @@ index ab59875738c..765b1907426 100644 struct hlsl_ir_expr *expr; unsigned int i; -@@ -2096,17 +2531,17 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr +@@ -2096,55 +2779,108 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; @@ -13245,23 +20338,29 @@ index ab59875738c..765b1907426 100644 if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; - list_add_before(&instr->entry, &mul1->entry); +- list_add_before(&instr->entry, &mul1->entry); ++ hlsl_block_add_instr(block, mul1); - if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, instr->loc))) + if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) return false; - list_add_before(&instr->entry, &neg1->entry); +- list_add_before(&instr->entry, &neg1->entry); ++ hlsl_block_add_instr(block, neg1); -@@ -2115,20 +2550,20 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) + return false; ge->data_type = btype; - list_add_before(&instr->entry, &ge->entry); +- list_add_before(&instr->entry, &ge->entry); ++ hlsl_block_add_instr(block, ge); - if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, instr->loc))) + if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) return false; - list_add_before(&instr->entry, &neg2->entry); +- list_add_before(&instr->entry, &neg2->entry); ++ hlsl_block_add_instr(block, neg2); - if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) +- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) ++ if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) return false; - if (!(one = hlsl_new_constant(ctx, type, &instr->loc))) @@ -13272,32 +20371,92 @@ index ab59875738c..765b1907426 100644 + one_value.u[i].f = 1.0f; + if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) + return false; -+ list_add_before(&instr->entry, &one->entry); ++ hlsl_block_add_instr(block, one); - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &one->node, &cond->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) return false; - list_add_before(&instr->entry, &div->entry); +- list_add_before(&instr->entry, &div->entry); ++ hlsl_block_add_instr(block, div); -@@ -2136,7 +2571,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, div, arg1))) return false; - list_add_before(&instr->entry, &mul2->entry); +- list_add_before(&instr->entry, &mul2->entry); ++ hlsl_block_add_instr(block, mul2); - if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, instr->loc))) + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) return false; - list_add_before(&instr->entry, &frc->entry); +- list_add_before(&instr->entry, &frc->entry); ++ hlsl_block_add_instr(block, frc); -@@ -2144,7 +2579,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - hlsl_src_remove(&expr->operands[0]); - hlsl_src_remove(&expr->operands[1]); - hlsl_src_from_node(&expr->operands[0], frc); +- expr->op = HLSL_OP2_MUL; +- hlsl_src_remove(&expr->operands[0]); +- hlsl_src_remove(&expr->operands[1]); +- hlsl_src_from_node(&expr->operands[0], frc); - hlsl_src_from_node(&expr->operands[1], &cond->node); -+ hlsl_src_from_node(&expr->operands[1], cond); ++ if (!(mul3 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, frc, cond))) ++ return false; ++ hlsl_block_add_instr(block, mul3); ++ ++ return true; ++} ++ ++static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; ++ static const struct hlsl_constant_value zero_value; ++ struct hlsl_type *arg_type, *cmp_type; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; ++ struct hlsl_ir_jump *jump; ++ struct hlsl_block block; ++ unsigned int i, count; ++ ++ if (instr->type != HLSL_IR_JUMP) ++ return false; ++ jump = hlsl_ir_jump(instr); ++ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) ++ return false; ++ ++ hlsl_block_init(&block); ++ ++ arg_type = jump->condition.node->data_type; ++ if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, zero); ++ ++ operands[0] = jump->condition.node; ++ operands[1] = zero; ++ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); ++ if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, cmp); ++ ++ if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, bool_false); ++ ++ or = bool_false; ++ ++ count = hlsl_type_component_count(cmp_type); ++ for (i = 0; i < count; ++i) ++ { ++ if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) ++ return false; ++ ++ if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) ++ return NULL; ++ hlsl_block_add_instr(&block, or); ++ } ++ ++ list_move_tail(&instr->entry, &block.instrs); ++ hlsl_src_remove(&jump->condition); ++ hlsl_src_from_node(&jump->condition, or); ++ jump->type = HLSL_IR_JUMP_DISCARD_NZ; return true; } -@@ -2155,6 +2590,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -2155,6 +2891,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { case HLSL_IR_CONSTANT: case HLSL_IR_EXPR: @@ -13305,7 +20464,7 @@ index ab59875738c..765b1907426 100644 case HLSL_IR_LOAD: case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_SWIZZLE: -@@ -2204,8 +2640,8 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in +@@ -2204,8 +2941,8 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in if (instr->type == HLSL_IR_IF) { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -13316,19 +20475,19 @@ index ab59875738c..765b1907426 100644 } else if (instr->type == HLSL_IR_LOOP) { -@@ -2262,9 +2698,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) +@@ -2262,9 +2999,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) continue; regset = hlsl_type_get_regset(var->data_type); - if (var->reg_reservation.type) -+ if (var->reg_reservation.reg_type && var->regs[regset].bind_count) ++ if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) { - if (var->reg_reservation.type != get_regset_name(regset)) + if (var->reg_reservation.reg_type != get_regset_name(regset)) { struct vkd3d_string_buffer *type_string; -@@ -2277,8 +2713,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) +@@ -2277,8 +3014,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) else { var->regs[regset].allocated = true; @@ -13337,11 +20496,11 @@ index ab59875738c..765b1907426 100644 + var->regs[regset].id = var->reg_reservation.reg_index; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, -+ var->reg_reservation.reg_index + var->regs[regset].bind_count); ++ var->reg_reservation.reg_index + var->regs[regset].allocation_size); } } } -@@ -2286,9 +2724,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) +@@ -2286,9 +3025,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend @@ -13354,7 +20513,7 @@ index ab59875738c..765b1907426 100644 static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; -@@ -2296,7 +2734,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2296,7 +3035,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { @@ -13363,7 +20522,7 @@ index ab59875738c..765b1907426 100644 switch (instr->type) { -@@ -2311,9 +2749,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2311,9 +3050,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; @@ -13375,7 +20534,7 @@ index ab59875738c..765b1907426 100644 break; } case HLSL_IR_EXPR: -@@ -2322,16 +2760,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2322,16 +3061,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop unsigned int i; for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) @@ -13396,7 +20555,7 @@ index ab59875738c..765b1907426 100644 break; } case HLSL_IR_LOAD: -@@ -2339,9 +2777,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2339,9 +3078,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_load *load = hlsl_ir_load(instr); var = load->src.var; @@ -13408,7 +20567,7 @@ index ab59875738c..765b1907426 100644 break; } case HLSL_IR_LOOP: -@@ -2357,22 +2795,30 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2357,22 +3096,31 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); var = load->resource.var; @@ -13428,7 +20587,8 @@ index ab59875738c..765b1907426 100644 } - load->coords.node->last_read = instr->index; -+ load->coords.node->last_read = last_read; ++ if (load->coords.node) ++ load->coords.node->last_read = last_read; if (load->texel_offset.node) - load->texel_offset.node->last_read = instr->index; + load->texel_offset.node->last_read = last_read; @@ -13446,7 +20606,7 @@ index ab59875738c..765b1907426 100644 break; } case HLSL_IR_RESOURCE_STORE: -@@ -2380,18 +2826,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -2380,22 +3128,37 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); var = store->resource.var; @@ -13477,8 +20637,20 @@ index ab59875738c..765b1907426 100644 + index->idx.node->last_read = last_read; break; } - case HLSL_IR_CONSTANT: -@@ -2426,127 +2880,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl +- case HLSL_IR_CONSTANT: + case HLSL_IR_JUMP: ++ { ++ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); ++ ++ if (jump->condition.node) ++ jump->condition.node->last_read = last_read; ++ break; ++ } ++ case HLSL_IR_CONSTANT: + break; + } + } +@@ -2426,127 +3189,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl compute_liveness_recurse(&entry_func->body, 0, 0); } @@ -13609,7 +20781,7 @@ index ab59875738c..765b1907426 100644 + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); + + ret.id = reg_idx; -+ ret.bind_count = 1; ++ ret.allocation_size = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + 1); @@ -13661,7 +20833,7 @@ index ab59875738c..765b1907426 100644 + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); + + ret.id = reg_idx; -+ ret.bind_count = align(reg_size, 4) / 4; ++ ret.allocation_size = align(reg_size, 4) / 4; ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); return ret; @@ -13683,12 +20855,12 @@ index ab59875738c..765b1907426 100644 } static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -2565,14 +3034,99 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct +@@ -2565,14 +3343,112 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); } -static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) -+static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; @@ -13700,15 +20872,16 @@ index ab59875738c..765b1907426 100644 + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; ++ + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); ++ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) ++ return false; + + if (regset == HLSL_REGSET_SAMPLERS) + { + enum hlsl_sampler_dim dim; + + assert(!load->sampler.var); -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -+ return false; + + dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) @@ -13726,25 +20899,39 @@ index ab59875738c..765b1907426 100644 + return false; + } + } -+ var->objects_usage[regset][index].used = true; -+ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + } -+ else ++ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; ++ ++ return false; ++} ++ ++static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_resource_load *load; ++ struct hlsl_ir_var *var; ++ enum hlsl_regset regset; ++ unsigned int index; ++ ++ if (instr->type != HLSL_IR_RESOURCE_LOAD) ++ return false; ++ ++ load = hlsl_ir_resource_load(instr); ++ var = load->resource.var; ++ ++ regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); ++ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) ++ return false; ++ ++ var->objects_usage[regset][index].used = true; ++ var->bind_count[regset] = max(var->bind_count[regset], index + 1); ++ if (load->sampler.var) + { -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) ++ var = load->sampler.var; ++ if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + -+ var->objects_usage[regset][index].used = true; -+ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; -+ -+ if (load->sampler.var) -+ { -+ var = load->sampler.var; -+ if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -+ return false; -+ -+ var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -+ } ++ var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; ++ var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); + } + + return false; @@ -13754,7 +20941,7 @@ index ab59875738c..765b1907426 100644 +{ + struct hlsl_ir_var *var; + struct hlsl_type *type; -+ unsigned int i, k; ++ unsigned int k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { @@ -13762,12 +20949,10 @@ index ab59875738c..765b1907426 100644 + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { -+ for (i = 0; i < type->reg_size[k]; ++i) -+ { -+ /* Samplers are only allocated until the last used one. */ -+ if (var->objects_usage[k][i].used) -+ var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; -+ } ++ bool is_separated = var->is_separated_resource; ++ ++ if (var->bind_count[k] > 0) ++ var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; + } + } +} @@ -13785,7 +20970,7 @@ index ab59875738c..765b1907426 100644 var->first_write, var->last_read, var->data_type); TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', -@@ -2580,15 +3134,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -2580,15 +3456,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir } } @@ -13808,7 +20993,7 @@ index ab59875738c..765b1907426 100644 instr->data_type); TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); -@@ -2599,8 +3158,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -2599,8 +3480,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -13819,7 +21004,7 @@ index ab59875738c..765b1907426 100644 break; } -@@ -2609,21 +3168,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -2609,21 +3490,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl struct hlsl_ir_load *load = hlsl_ir_load(instr); /* We need to at least allocate a variable for undefs. * FIXME: We should probably find a way to remove them instead. */ @@ -13844,50 +21029,147 @@ index ab59875738c..765b1907426 100644 break; } -@@ -2633,7 +3192,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -2633,9 +3514,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl } } -static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) -+static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, struct register_allocator *allocator) ++static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) { struct hlsl_constant_defs *defs = &ctx->constant_defs; ++ struct hlsl_constant_register *reg; ++ size_t i; ++ ++ for (i = 0; i < defs->count; ++i) ++ { ++ reg = &defs->regs[i]; ++ if (reg->index == (component_index / 4)) ++ { ++ reg->value.f[component_index % 4] = f; ++ return; ++ } ++ } ++ ++ if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) ++ return; ++ reg = &defs->regs[defs->count++]; ++ memset(reg, 0, sizeof(*reg)); ++ reg->index = component_index / 4; ++ reg->value.f[component_index % 4] = f; ++} ++ ++static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, struct register_allocator *allocator) ++{ struct hlsl_ir_node *instr; -@@ -2649,7 +3209,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b - unsigned int x, y, i, writemask, end_reg; - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +@@ -2646,66 +3551,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + const struct hlsl_type *type = instr->data_type; +- unsigned int x, y, i, writemask, end_reg; +- unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int x, i; - constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, -@@ -2662,7 +3222,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b - defs->count = end_reg; - } +- if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, +- constant->reg.id + reg_size / 4, sizeof(*defs->values))) +- return; +- end_reg = constant->reg.id + reg_size / 4; +- if (end_reg > defs->count) +- { +- memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); +- defs->count = end_reg; +- } ++ assert(type->class <= HLSL_CLASS_LAST_NUMERIC); ++ assert(type->dimy == 1); ++ assert(constant->reg.writemask); - assert(type->type <= HLSL_CLASS_LAST_NUMERIC); -+ assert(type->class <= HLSL_CLASS_LAST_NUMERIC); ++ for (x = 0, i = 0; x < 4; ++x) ++ { ++ const union hlsl_constant_value_component *value; ++ float f; - if (!(writemask = constant->reg.writemask)) - writemask = (1u << type->dimx) - 1; -@@ -2671,12 +3231,12 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b - { - for (x = 0, i = 0; x < 4; ++x) +- if (!(writemask = constant->reg.writemask)) +- writemask = (1u << type->dimx) - 1; ++ if (!(constant->reg.writemask & (1u << x))) ++ continue; ++ value = &constant->value.u[i++]; + +- for (y = 0; y < type->dimy; ++y) +- { +- for (x = 0, i = 0; x < 4; ++x) ++ switch (type->base_type) { - const union hlsl_constant_value *value; -+ const union hlsl_constant_value_component *value; - float f; - - if (!(writemask & (1u << x))) - continue; +- float f; +- +- if (!(writemask & (1u << x))) +- continue; - value = &constant->value[i++]; -+ value = &constant->value.u[i++]; +- +- switch (type->base_type) +- { +- case HLSL_TYPE_BOOL: +- f = !!value->u; +- break; +- +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- f = value->f; +- break; +- +- case HLSL_TYPE_INT: +- f = value->i; +- break; +- +- case HLSL_TYPE_UINT: +- f = value->u; +- break; +- +- case HLSL_TYPE_DOUBLE: +- FIXME("Double constant.\n"); +- return; +- +- default: +- vkd3d_unreachable(); +- } +- defs->values[constant->reg.id + y].f[x] = f; ++ case HLSL_TYPE_BOOL: ++ f = !!value->u; ++ break; ++ ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ f = value->f; ++ break; ++ ++ case HLSL_TYPE_INT: ++ f = value->i; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ f = value->u; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ FIXME("Double constant.\n"); ++ return; ++ ++ default: ++ vkd3d_unreachable(); + } ++ ++ record_constant(ctx, constant->reg.id * 4 + x, f); + } - switch (type->base_type) - { -@@ -2714,15 +3274,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b + break; +@@ -2714,15 +3605,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); @@ -13906,7 +21188,7 @@ index ab59875738c..765b1907426 100644 break; } -@@ -2734,10 +3294,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b +@@ -2734,11 +3625,9 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { @@ -13915,11 +21197,11 @@ index ab59875738c..765b1907426 100644 struct hlsl_ir_var *var; - allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); -+ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); - +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { -@@ -2748,12 +3308,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + if (var->is_uniform && var->last_read) +@@ -2748,12 +3637,16 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (reg_size == 0) continue; @@ -13931,11 +21213,13 @@ index ab59875738c..765b1907426 100644 } } + ++ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); ++ + vkd3d_free(allocator.allocations); } /* Simple greedy temporary register allocation pass that just assigns a unique -@@ -2762,15 +3324,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -2762,15 +3655,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * does not handle constants. */ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { @@ -13974,7 +21258,7 @@ index ab59875738c..765b1907426 100644 { [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", -@@ -2791,7 +3371,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -2791,7 +3702,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var D3DDECLUSAGE usage; uint32_t usage_idx; @@ -13988,7 +21272,7 @@ index ab59875738c..765b1907426 100644 { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); -@@ -2800,8 +3385,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -2800,8 +3716,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if ((!output && !var->last_read) || (output && !var->first_write)) return; @@ -13997,15 +21281,15 @@ index ab59875738c..765b1907426 100644 } else { -@@ -2827,6 +3410,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -2827,6 +3741,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; -+ var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; ++ var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); -@@ -2853,23 +3437,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 +@@ -2853,23 +3768,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) { @@ -14075,7 +21359,7 @@ index ab59875738c..765b1907426 100644 + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { -+ if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) ++ if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) + continue; + + buffer = var1->buffer; @@ -14086,7 +21370,7 @@ index ab59875738c..765b1907426 100644 + { + unsigned int var1_reg_size, var2_reg_size; + -+ if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) ++ if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) + continue; + + if (var1 == var2 || var1->buffer != var2->buffer) @@ -14129,12 +21413,12 @@ index ab59875738c..765b1907426 100644 } static void allocate_buffers(struct hlsl_ctx *ctx) -@@ -2880,15 +3558,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -2880,15 +3889,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->type != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) ++ if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) { if (var->is_param) var->buffer = ctx->params_buffer; @@ -14149,7 +21433,7 @@ index ab59875738c..765b1907426 100644 LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) { if (!buffer->used_size) -@@ -2896,28 +3576,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -2896,28 +3907,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (buffer->type == HLSL_BUFFER_CONSTANT) { @@ -14171,7 +21455,7 @@ index ab59875738c..765b1907426 100644 - buffer->reg.id = buffer->reservation.index; + buffer->reg.id = buffer->reservation.reg_index; -+ buffer->reg.bind_count = 1; ++ buffer->reg.allocation_size = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } @@ -14182,12 +21466,16 @@ index ab59875738c..765b1907426 100644 ++index; buffer->reg.id = index; -+ buffer->reg.bind_count = 1; ++ buffer->reg.allocation_size = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; -@@ -2939,13 +3621,29 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - uint32_t index) +@@ -2936,16 +3949,35 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + } + + static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, +- uint32_t index) ++ uint32_t index, bool allocated_only) { const struct hlsl_ir_var *var; + unsigned int start, count; @@ -14202,11 +21490,14 @@ index ab59875738c..765b1907426 100644 + * bound there even if the reserved vars aren't used. */ + start = var->reg_reservation.reg_index; + count = var->data_type->reg_size[regset]; ++ ++ if (!var->regs[regset].allocated && allocated_only) ++ continue; + } + else if (var->regs[regset].allocated) + { + start = var->regs[regset].id; -+ count = var->regs[regset].bind_count; ++ count = var->regs[regset].allocation_size; + } + else + { @@ -14218,7 +21509,7 @@ index ab59875738c..765b1907426 100644 return var; } return NULL; -@@ -2956,7 +3654,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +@@ -2956,7 +3988,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; @@ -14226,7 +21517,7 @@ index ab59875738c..765b1907426 100644 if (regset == HLSL_REGSET_UAVS) { -@@ -2968,19 +3665,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +@@ -2968,19 +3999,18 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } } @@ -14235,11 +21526,12 @@ index ab59875738c..765b1907426 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || !var->data_type->reg_size[regset]) -+ unsigned int count = var->regs[regset].bind_count; ++ unsigned int count = var->regs[regset].allocation_size; + + if (count == 0) continue; ++ /* The variable was already allocated if it has a reservation. */ if (var->regs[regset].allocated) { - const struct hlsl_ir_var *reserved_object; @@ -14251,7 +21543,7 @@ index ab59875738c..765b1907426 100644 if (var->regs[regset].id < min_index) { -@@ -2988,28 +3683,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +@@ -2988,28 +4018,47 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); @@ -14272,7 +21564,10 @@ index ab59875738c..765b1907426 100644 - var->regs[regset].id = var->reg_reservation.index; - var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); -+ reserved_object = get_allocated_object(ctx, regset, index); ++ /* get_allocated_object() may return "var" itself, but we ++ * actually want that, otherwise we'll end up reporting the ++ * same conflict between the same two variables twice. */ ++ reserved_object = get_allocated_object(ctx, regset, index, true); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, @@ -14292,7 +21587,7 @@ index ab59875738c..765b1907426 100644 + + while (available < count) + { -+ if (get_allocated_object(ctx, regset, index)) ++ if (get_allocated_object(ctx, regset, index, false)) + available = 0; + else + ++available; @@ -14308,7 +21603,7 @@ index ab59875738c..765b1907426 100644 ++index; } } -@@ -3034,12 +3745,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl +@@ -3034,12 +4083,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return false; /* We should always have generated a cast to UINT. */ @@ -14324,7 +21619,7 @@ index ab59875738c..765b1907426 100644 { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) -@@ -3090,6 +3801,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl +@@ -3090,9 +4139,59 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; } @@ -14380,7 +21675,11 @@ index ab59875738c..765b1907426 100644 bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; -@@ -3102,13 +3862,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref ++ enum hlsl_regset regset; + unsigned int size; + + if (!offset_node) +@@ -3102,15 +4201,16 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref } /* We should always have generated a cast to UINT. */ @@ -14393,10 +21692,24 @@ index ab59875738c..765b1907426 100644 - *offset = hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value.u[0].u; ++ regset = hlsl_type_get_regset(deref->data_type); - size = deref->var->data_type->reg_size[deref->offset_regset]; +- size = deref->var->data_type->reg_size[deref->offset_regset]; ++ size = deref->var->data_type->reg_size[regset]; if (*offset >= size) -@@ -3170,7 +3930,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a + { + hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +@@ -3140,7 +4240,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +- assert(deref->offset_regset == HLSL_REGSET_NUMERIC); ++ assert(deref->data_type); ++ assert(deref->data_type->class <= HLSL_CLASS_LAST_NUMERIC); + + ret.id += offset / 4; + +@@ -3170,7 +4271,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a const struct hlsl_type *type = instr->data_type; const struct hlsl_ir_constant *constant; @@ -14405,7 +21718,7 @@ index ab59875738c..765b1907426 100644 || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) { struct vkd3d_string_buffer *string; -@@ -3190,15 +3950,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a +@@ -3190,15 +4291,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } constant = hlsl_ir_constant(instr); @@ -14443,7 +21756,7 @@ index ab59875738c..765b1907426 100644 int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { -@@ -3209,10 +3988,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -3209,10 +4329,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry unsigned int i; bool progress; @@ -14456,25 +21769,32 @@ index ab59875738c..765b1907426 100644 vkd3d_free(recursive_call_ctx.backtrace); /* Avoid going into an infinite loop when processing call instructions. -@@ -3222,7 +4001,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -3222,41 +4342,51 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_return(ctx, entry_func, body, false); - while (transform_ir(ctx, lower_calls, body, NULL)); + while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); + ++ lower_ir(ctx, lower_matrix_swizzles, body); + hlsl_transform_ir(ctx, lower_index_loads, body, NULL); LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { -@@ -3234,15 +4015,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) +- prepend_uniform_copy(ctx, &body->instrs, var); ++ prepend_uniform_copy(ctx, body, var); + } + + for (i = 0; i < entry_func->parameters.count; ++i) { var = entry_func->parameters.vars[i]; - if (var->data_type->type == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { - prepend_uniform_copy(ctx, &body->instrs, var); +- prepend_uniform_copy(ctx, &body->instrs, var); ++ prepend_uniform_copy(ctx, body, var); } else { @@ -14491,8 +21811,12 @@ index ab59875738c..765b1907426 100644 + } if (var->storage_modifiers & HLSL_STORAGE_IN) - prepend_input_var_copy(ctx, &body->instrs, var); -@@ -3252,7 +4040,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +- prepend_input_var_copy(ctx, &body->instrs, var); ++ prepend_input_var_copy(ctx, body, var); + if (var->storage_modifiers & HLSL_STORAGE_OUT) +- append_output_var_copy(ctx, &body->instrs, var); ++ append_output_var_copy(ctx, body, var); + } } if (entry_func->return_var) { @@ -14501,12 +21825,21 @@ index ab59875738c..765b1907426 100644 hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); -@@ -3274,60 +4062,71 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); ++ append_output_var_copy(ctx, body, entry_func->return_var); + } + + for (i = 0; i < entry_func->attr_count; ++i) +@@ -3274,60 +4404,81 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - transform_ir(ctx, lower_broadcasts, body, NULL); - while (transform_ir(ctx, fold_redundant_casts, body, NULL)); ++ if (profile->major_version >= 4) ++ { ++ hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); ++ } + hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); + while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do @@ -14530,10 +21863,10 @@ index ab59875738c..765b1907426 100644 + hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); -+ hlsl_transform_ir(ctx, lower_int_division, body, NULL); -+ hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); ++ lower_ir(ctx, lower_int_division, body); ++ lower_ir(ctx, lower_int_modulus, body); + hlsl_transform_ir(ctx, lower_int_abs, body, NULL); -+ hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); ++ lower_ir(ctx, lower_float_modulus, body); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do { @@ -14554,6 +21887,15 @@ index ab59875738c..765b1907426 100644 + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + ++ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); ++ hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); ++ if (profile->major_version >= 4) ++ hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); ++ hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); ++ sort_synthetic_separated_samplers_first(ctx); ++ ++ if (profile->major_version >= 4) ++ hlsl_transform_ir(ctx, lower_ternary, body, NULL); if (profile->major_version < 4) { - transform_ir(ctx, lower_division, body, NULL); @@ -14572,9 +21914,7 @@ index ab59875738c..765b1907426 100644 } - transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); - +- /* TODO: move forward, remove when no longer needed */ - transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); - while (transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); @@ -14599,7 +21939,7 @@ index ab59875738c..765b1907426 100644 if (profile->major_version < 4) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 3210bbd5712..301113c8477 100644 +index 3210bbd5712..41a72ab6c0d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -22,7 +22,49 @@ @@ -14653,7 +21993,7 @@ index 3210bbd5712..301113c8477 100644 { unsigned int k; uint32_t u; -@@ -30,11 +72,11 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct +@@ -30,75 +72,75 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct double d; float f; @@ -14668,7 +22008,10 @@ index 3210bbd5712..301113c8477 100644 return false; } -@@ -44,61 +86,61 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (src->node.data_type->base_type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: @@ -14755,29 +22098,78 @@ index 3210bbd5712..301113c8477 100644 break; case HLSL_TYPE_BOOL: -@@ -110,9 +152,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct +@@ -110,398 +152,728 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct return true; } -static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) -+static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, -+ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) ++static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src->node.data_type->base_type); -@@ -123,30 +166,30 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct + +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = -src->value[k].f; -+ dst->u[k].f = -src->value.u[k].f; ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, ++ "Indefinite logarithm result."); ++ } ++ dst->u[k].f = log2f(src->value.u[k].f); ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } break; case HLSL_TYPE_DOUBLE: - dst->value[k].d = -src->value[k].d; ++ if (src->value.u[k].d < 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, ++ "Indefinite logarithm result."); ++ } ++ dst->u[k].d = log2(src->value.u[k].d); ++ break; ++ ++ default: ++ FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, ++ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = -src->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: + dst->u[k].d = -src->value.u[k].d; break; @@ -14798,6 +22190,110 @@ index 3210bbd5712..301113c8477 100644 -static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, - struct hlsl_ir_constant *src2) ++static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(type == src->node.data_type->base_type); + +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- dst->value[k].f = src1->value[k].f + src2->value[k].f; ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, ++ "Floating point division by zero."); ++ } ++ dst->u[k].f = 1.0f / src->value.u[k].f; ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } + break; + + case HLSL_TYPE_DOUBLE: +- dst->value[k].d = src1->value[k].d + src2->value[k].d; ++ if (src->value.u[k].d == 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, ++ "Floating point division by zero."); ++ } ++ dst->u[k].d = 1.0 / src->value.u[k].d; + break; + +- /* Handling HLSL_TYPE_INT through the unsigned field to avoid +- * undefined behavior with signed integers in C. */ +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- dst->value[k].u = src1->value[k].u + src2->value[k].u; ++ default: ++ FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, ++ "Imaginary square root result."); ++ } ++ dst->u[k].f = sqrtf(src->value.u[k].f); ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (src->value.u[k].d < 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, ++ "Imaginary square root result."); ++ } ++ dst->u[k].d = sqrt(src->value.u[k].d); + break; + + default: +- FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } ++ + return true; + } + +-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { @@ -14806,68 +22302,35 @@ index 3210bbd5712..301113c8477 100644 unsigned int k; assert(type == src1->node.data_type->base_type); -@@ -158,32 +201,32 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- dst->value[k].f = src1->value[k].f + src2->value[k].f; -+ dst->u[k].f = src1->value.u[k].f + src2->value.u[k].f; - break; + assert(type == src2->node.data_type->base_type); - case HLSL_TYPE_DOUBLE: -- dst->value[k].d = src1->value[k].d + src2->value[k].d; -+ dst->u[k].d = src1->value.u[k].d + src2->value.u[k].d; - break; - - /* Handling HLSL_TYPE_INT through the unsigned field to avoid - * undefined behavior with signed integers in C. */ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u + src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u + src2->value.u[k].u; - break; - - default: -- FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); -@@ -195,32 +238,32 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = src1->value[k].f * src2->value[k].f; -+ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; ++ dst->u[k].f = src1->value.u[k].f + src2->value.u[k].f; break; case HLSL_TYPE_DOUBLE: - dst->value[k].d = src1->value[k].d * src2->value[k].d; -+ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; ++ dst->u[k].d = src1->value.u[k].d + src2->value.u[k].d; break; ++ /* Handling HLSL_TYPE_INT through the unsigned field to avoid ++ * undefined behavior with signed integers in C. */ case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u * src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; ++ dst->u[k].u = src1->value.u[k].u + src2->value.u[k].u; break; default: - FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } @@ -14876,51 +22339,131 @@ index 3210bbd5712..301113c8477 100644 -static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { ++ enum hlsl_base_type type = dst_type->base_type; unsigned int k; - assert(dst->node.data_type->base_type == HLSL_TYPE_BOOL); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); +- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); - for (k = 0; k < 4; ++k) -@@ -229,270 +272,270 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { +- switch (src1->node.data_type->base_type) ++ switch (type) { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: - dst->value[k].u = src1->value[k].f != src2->value[k].f; -+ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; break; - case HLSL_TYPE_DOUBLE: +- case HLSL_TYPE_DOUBLE: - dst->value[k].u = src1->value[k].d != src2->value[k].d; -+ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; - break; +- break; ++ default: ++ FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: - dst->value[k].u = src1->value[k].u != src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; ++ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; break; default: - vkd3d_unreachable(); +- vkd3d_unreachable(); ++ FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; ++ break; ++ ++ default: ++ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; } ++ } ++ return true; ++} - dst->value[k].u *= ~0u; -+ dst->u[k].u *= ~0u; ++static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ ++ dst->u[0].f = 0.0f; ++ for (k = 0; k < src1->node.data_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; ++ break; ++ default: ++ FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } } ++ return true; } -static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -+ const struct vkd3d_shader_location *loc) ++static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; @@ -14928,15 +22471,46 @@ index 3210bbd5712..301113c8477 100644 assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); ++ assert(type == src3->node.data_type->base_type); ++ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ assert(src3->node.data_type->dimx == 1); - for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) ++ dst->u[0].f = src3->value.u[0].f; ++ for (k = 0; k < src1->node.data_type->dimx; ++k) { switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - if (ctx->profile->major_version >= 4 && src2->value[k].f == 0) ++ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; ++ break; ++ default: ++ FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, ++ const struct vkd3d_shader_location *loc) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src2->value.u[k].f == 0) { - hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, @@ -15008,6 +22582,246 @@ index 3210bbd5712..301113c8477 100644 -static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { +- switch (type) ++ switch (src1->node.data_type->base_type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: +- if (src2->value[k].i == 0) +- { +- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, +- "Division by zero."); +- return false; +- } +- if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) +- dst->value[k].i = 0; +- else +- dst->value[k].i = src1->value[k].i % src2->value[k].i; ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ dst->u[k].u *= ~0u; ++ } ++ return true; ++} ++ ++static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ unsigned int k; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (src1->node.data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; ++ break; ++ ++ case HLSL_TYPE_INT: ++ dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: +- if (src2->value[k].u == 0) +- { +- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, +- "Division by zero."); +- return false; +- } +- dst->value[k].u = src1->value[k].u % src2->value[k].u; ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; + break; + + default: +- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); +- return false; ++ vkd3d_unreachable(); + } ++ ++ dst->u[k].u *= ~0u; + } + return true; + } + +-static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; + unsigned int k; + +- assert(type == src1->node.data_type->base_type); +- assert(type == src2->node.data_type->base_type); ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { +- switch (type) ++ switch (src1->node.data_type->base_type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; ++ break; ++ + case HLSL_TYPE_INT: +- dst->value[k].i = max(src1->value[k].i, src2->value[k].i); ++ dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: +- dst->value[k].u = max(src1->value[k].u, src2->value[k].u); ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; + break; + + default: +- FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); +- return false; ++ vkd3d_unreachable(); + } ++ ++ dst->u[k].u *= ~0u; + } + return true; + } + +-static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); ++ break; ++ + case HLSL_TYPE_INT: +- dst->value[k].i = min(src1->value[k].i, src2->value[k].i); ++ dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); + break; + + case HLSL_TYPE_UINT: +- dst->value[k].u = min(src1->value[k].u, src2->value[k].u); ++ dst->u[k].u = max(src1->value.u[k].u, src2->value.u[k].u); + break; + + default: +- FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); ++ break; ++ + case HLSL_TYPE_INT: ++ dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); ++ break; ++ + case HLSL_TYPE_UINT: +- dst->value[k].u = src1->value[k].u ^ src2->value[k].u; ++ dst->u[k].u = min(src1->value.u[k].u, src2->value.u[k].u); + break; + + default: +- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) @@ -15025,175 +22839,30 @@ index 3210bbd5712..301113c8477 100644 switch (type) { case HLSL_TYPE_INT: -- if (src2->value[k].i == 0) + if (src2->value.u[k].i == 0) - { -- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -- "Division by zero."); ++ { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } -- if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) -- dst->value[k].i = 0; ++ return false; ++ } + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = 0; - else -- dst->value[k].i = src1->value[k].i % src2->value[k].i; ++ else + dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; - break; - ++ break; ++ case HLSL_TYPE_UINT: -- if (src2->value[k].u == 0) +- dst->value[k].u = src1->value[k].u & src2->value[k].u; + if (src2->value.u[k].u == 0) - { -- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -- "Division by zero."); ++ { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } -- dst->value[k].u = src1->value[k].u % src2->value[k].u; ++ return false; ++ } + dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; break; - default: -- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_INT: -- dst->value[k].i = max(src1->value[k].i, src2->value[k].i); -+ dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); - break; - - case HLSL_TYPE_UINT: -- dst->value[k].u = max(src1->value[k].u, src2->value[k].u); -+ dst->u[k].u = max(src1->value.u[k].u, src2->value.u[k].u); - break; - - default: -- FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_INT: -- dst->value[k].i = min(src1->value[k].i, src2->value[k].i); -+ dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); - break; - - case HLSL_TYPE_UINT: -- dst->value[k].u = min(src1->value[k].u, src2->value[k].u); -+ dst->u[k].u = min(src1->value.u[k].u, src2->value.u[k].u); - break; - - default: -- FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u ^ src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u & src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; - break; - default: - FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } @@ -15202,7 +22871,7 @@ index 3210bbd5712..301113c8477 100644 -static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; @@ -15217,30 +22886,76 @@ index 3210bbd5712..301113c8477 100644 { switch (type) { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; ++ break; ++ case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u | src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; ++ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; break; default: - FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } -@@ -501,7 +544,9 @@ static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, + return true; + } ++static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ unsigned int k; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (src1->node.data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; ++ break; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ dst->u[k].u *= ~0u; ++ } ++ return true; ++} ++ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; -+ struct hlsl_ir_constant *arg1, *arg2 = NULL; ++ struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; + struct hlsl_constant_value res = {0}; + struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; unsigned int i; bool success; -@@ -512,7 +557,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -512,7 +884,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (!expr->operands[0].node) return false; @@ -15249,7 +22964,7 @@ index 3210bbd5712..301113c8477 100644 return false; for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) -@@ -521,64 +566,65 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -521,64 +893,101 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, { if (expr->operands[i].node->type != HLSL_IR_CONSTANT) return false; @@ -15260,10 +22975,12 @@ index 3210bbd5712..301113c8477 100644 arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); - +- - if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) - return false; -- ++ if (expr->operands[2].node) ++ arg3 = hlsl_ir_constant(expr->operands[2].node); + switch (expr->op) { + case HLSL_OP1_ABS: @@ -15273,11 +22990,23 @@ index 3210bbd5712..301113c8477 100644 case HLSL_OP1_CAST: - success = fold_cast(ctx, res, arg1); + success = fold_cast(ctx, &res, instr->data_type, arg1); ++ break; ++ ++ case HLSL_OP1_LOG2: ++ success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); break; case HLSL_OP1_NEG: - success = fold_neg(ctx, res, arg1); + success = fold_neg(ctx, &res, instr->data_type, arg1); ++ break; ++ ++ case HLSL_OP1_RCP: ++ success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); ++ break; ++ ++ case HLSL_OP1_SQRT: ++ success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); break; case HLSL_OP2_ADD: @@ -15285,14 +23014,26 @@ index 3210bbd5712..301113c8477 100644 + success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_MUL: +- case HLSL_OP2_MUL: - success = fold_mul(ctx, res, arg1, arg2); -+ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_BIT_AND: ++ case HLSL_OP2_LOGIC_AND: ++ success = fold_and(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_NEQUAL: +- case HLSL_OP2_NEQUAL: - success = fold_nequal(ctx, res, arg1, arg2); -+ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_BIT_OR: ++ case HLSL_OP2_LOGIC_OR: ++ success = fold_or(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_BIT_XOR: ++ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_DOT: ++ success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_DIV: @@ -15300,9 +23041,18 @@ index 3210bbd5712..301113c8477 100644 + success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; - case HLSL_OP2_MOD: +- case HLSL_OP2_MOD: - success = fold_mod(ctx, res, arg1, arg2); -+ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); ++ case HLSL_OP2_EQUAL: ++ success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_GEQUAL: ++ success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_LESS: ++ success = fold_less(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_MAX: @@ -15315,23 +23065,30 @@ index 3210bbd5712..301113c8477 100644 + success = fold_min(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_BIT_XOR: +- case HLSL_OP2_BIT_XOR: - success = fold_bit_xor(ctx, res, arg1, arg2); -+ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_MOD: ++ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; - case HLSL_OP2_BIT_AND: +- case HLSL_OP2_BIT_AND: - success = fold_bit_and(ctx, res, arg1, arg2); -+ success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_MUL: ++ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_BIT_OR: +- case HLSL_OP2_BIT_OR: - success = fold_bit_or(ctx, res, arg1, arg2); -+ success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); ++ case HLSL_OP2_NEQUAL: ++ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ ++ case HLSL_OP3_DP2ADD: ++ success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); break; default: -@@ -589,20 +635,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +@@ -589,20 +998,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (success) { @@ -15359,7 +23116,7 @@ index 3210bbd5712..301113c8477 100644 unsigned int i; if (instr->type != HLSL_IR_SWIZZLE) -@@ -610,15 +656,15 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst +@@ -610,15 +1019,15 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst swizzle = hlsl_ir_swizzle(instr); if (swizzle->val.node->type != HLSL_IR_CONSTANT) return false; @@ -18907,10 +26664,10 @@ index 553a75818e7..00000000000 -} diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c new file mode 100644 -index 00000000000..9eefb82c226 +index 00000000000..d2bfb933edc --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -0,0 +1,1072 @@ +@@ -0,0 +1,1294 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * @@ -18998,6 +26755,72 @@ index 00000000000..9eefb82c226 + shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); +} + ++static const struct vkd3d_shader_varying_map *find_varying_map( ++ const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < varying_map->varying_count; ++i) ++ { ++ if (varying_map->varying_map[i].output_signature_index == signature_idx) ++ return &varying_map->varying_map[i]; ++ } ++ ++ return NULL; ++} ++ ++static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info) ++{ ++ struct shader_signature *signature = &parser->shader_desc.output_signature; ++ const struct vkd3d_shader_varying_map_info *varying_map; ++ unsigned int i; ++ ++ if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO))) ++ return VKD3D_OK; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i); ++ struct signature_element *e = &signature->elements[i]; ++ ++ if (map) ++ { ++ unsigned int input_mask = map->input_mask; ++ ++ e->target_location = map->input_register_index; ++ ++ /* It is illegal in Vulkan if the next shader uses the same varying ++ * location with a different mask. */ ++ if (input_mask && input_mask != e->mask) ++ { ++ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Aborting due to not yet implemented feature: " ++ "Output mask %#x does not match input mask %#x.", ++ e->mask, input_mask); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ } ++ else ++ { ++ e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; ++ } ++ } ++ ++ for (i = 0; i < varying_map->varying_count; ++i) ++ { ++ if (varying_map->varying_map[i].output_signature_index >= signature->element_count) ++ { ++ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Aborting due to not yet implemented feature: " ++ "The next stage consumes varyings not written by this stage."); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ +struct hull_flattener +{ + struct vkd3d_shader_instruction_array instructions; @@ -19143,7 +26966,7 @@ index 00000000000..9eefb82c226 + return VKD3D_OK; +} + -+static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, ++void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + reg->type = reg_type; @@ -19160,13 +26983,13 @@ index 00000000000..9eefb82c226 + reg->immconst_type = VKD3D_IMMCONST_SCALAR; +} + -+static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) ++void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) +{ + memset(ins, 0, sizeof(*ins)); + ins->handler_idx = handler_idx; +} + -+enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) ++static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) +{ + struct hull_flattener flattener = {*src_instructions}; + struct vkd3d_shader_instruction_array *instructions; @@ -19301,7 +27124,7 @@ index 00000000000..9eefb82c226 + return VKD3D_OK; +} + -+enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( ++static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) +{ + struct vkd3d_shader_instruction_array *instructions; @@ -19912,7 +27735,7 @@ index 00000000000..9eefb82c226 + shader_instruction_init(ins, VKD3DSIH_NOP); +} + -+enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, ++static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) +{ @@ -19983,6 +27806,162 @@ index 00000000000..9eefb82c226 + *instructions = normaliser.instructions; + return VKD3D_OK; +} ++ ++struct flat_constant_def ++{ ++ enum vkd3d_shader_d3dbc_constant_register set; ++ uint32_t index; ++ uint32_t value[4]; ++}; ++ ++struct flat_constants_normaliser ++{ ++ struct vkd3d_shader_parser *parser; ++ struct flat_constant_def *defs; ++ size_t def_count, defs_capacity; ++}; ++ ++static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, ++ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) ++{ ++ static const struct ++ { ++ enum vkd3d_shader_register_type type; ++ enum vkd3d_shader_d3dbc_constant_register set; ++ uint32_t offset; ++ } ++ regs[] = ++ { ++ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, ++ {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, ++ {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, ++ {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, ++ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, ++ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, ++ }; ++ ++ unsigned int i; ++ ++ for (i = 0; i < ARRAY_SIZE(regs); ++i) ++ { ++ if (reg->type == regs[i].type) ++ { ++ if (reg->idx[0].rel_addr) ++ { ++ FIXME("Unhandled relative address.\n"); ++ return false; ++ } ++ ++ *set = regs[i].set; ++ *index = regs[i].offset + reg->idx[0].offset; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param, ++ const struct flat_constants_normaliser *normaliser) ++{ ++ enum vkd3d_shader_d3dbc_constant_register set; ++ uint32_t index; ++ size_t i, j; ++ ++ if (!get_flat_constant_register_type(¶m->reg, &set, &index)) ++ return; ++ ++ for (i = 0; i < normaliser->def_count; ++i) ++ { ++ if (normaliser->defs[i].set == set && normaliser->defs[i].index == index) ++ { ++ param->reg.type = VKD3DSPR_IMMCONST; ++ param->reg.idx_count = 0; ++ param->reg.immconst_type = VKD3D_IMMCONST_VEC4; ++ for (j = 0; j < 4; ++j) ++ param->reg.u.immconst_uint[j] = normaliser->defs[i].value[j]; ++ return; ++ } ++ } ++ ++ param->reg.type = VKD3DSPR_CONSTBUFFER; ++ param->reg.idx[0].offset = set; /* register ID */ ++ param->reg.idx[1].offset = set; /* register index */ ++ param->reg.idx[2].offset = index; /* buffer index */ ++ param->reg.idx_count = 3; ++} ++ ++static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d_shader_parser *parser) ++{ ++ struct flat_constants_normaliser normaliser = {.parser = parser}; ++ unsigned int i, j; ++ ++ for (i = 0; i < parser->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; ++ ++ if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) ++ { ++ struct flat_constant_def *def; ++ ++ if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity, ++ normaliser.def_count + 1, sizeof(*normaliser.defs))) ++ { ++ vkd3d_free(normaliser.defs); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ def = &normaliser.defs[normaliser.def_count++]; ++ ++ get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); ++ for (j = 0; j < 4; ++j) ++ def->value[j] = ins->src[0].reg.u.immconst_uint[j]; ++ ++ vkd3d_shader_instruction_make_nop(ins); ++ } ++ else ++ { ++ for (j = 0; j < ins->src_count; ++j) ++ shader_register_normalise_flat_constants((struct vkd3d_shader_src_param *)&ins->src[j], &normaliser); ++ } ++ } ++ ++ vkd3d_free(normaliser.defs); ++ return VKD3D_OK; ++} ++ ++enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &parser->instructions; ++ enum vkd3d_result result = VKD3D_OK; ++ ++ if (parser->shader_desc.is_dxil) ++ return result; ++ ++ if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL ++ && (result = remap_output_signature(parser, compile_info)) < 0) ++ return result; ++ ++ if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL ++ && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) ++ { ++ result = instruction_array_normalise_hull_shader_control_point_io(instructions, ++ &parser->shader_desc.input_signature); ++ } ++ if (result >= 0) ++ result = instruction_array_normalise_io_registers(instructions, parser->shader_version.type, ++ &parser->shader_desc.input_signature, &parser->shader_desc.output_signature, ++ &parser->shader_desc.patch_constant_signature); ++ ++ if (result >= 0) ++ result = instruction_array_normalise_flat_constants(parser); ++ ++ if (result >= 0 && TRACE_ON()) ++ vkd3d_shader_trace(instructions, &parser->shader_version); ++ ++ return result; ++} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h index 4860cf5f90e..e1cb75e177c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.h @@ -19997,10 +27976,24 @@ index 4860cf5f90e..e1cb75e177c 100644 struct preproc_if_state { diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index bb5a6b61de1..94079696280 100644 +index bb5a6b61de1..6fb61eff6c3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -41,6 +41,7 @@ static void update_location(struct preproc_ctx *ctx); +@@ -30,6 +30,13 @@ + + #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) + ++static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) ++{ ++ if (!ctx->expansion_count) ++ return NULL; ++ return ctx->expansion_stack[ctx->expansion_count - 1].macro; ++} ++ + static void update_location(struct preproc_ctx *ctx); + + #define YY_USER_ACTION update_location(yyget_extra(yyscanner)); +@@ -41,6 +48,7 @@ static void update_location(struct preproc_ctx *ctx); %option bison-locations %option extra-type="struct preproc_ctx *" %option never-interactive @@ -20008,7 +28001,7 @@ index bb5a6b61de1..94079696280 100644 %option noinput %option nounput %option noyy_top_state -@@ -75,6 +76,7 @@ INT_SUFFIX [uUlL]{0,2} +@@ -75,6 +83,7 @@ INT_SUFFIX [uUlL]{0,2} "*/" {yy_pop_state(yyscanner);} <> {yy_pop_state(yyscanner);} . {} @@ -20016,7 +28009,29 @@ index bb5a6b61de1..94079696280 100644 (\\{NEWLINE}|[^\n])* {return T_STRING;} -@@ -176,9 +178,9 @@ INT_SUFFIX [uUlL]{0,2} +@@ -123,7 +132,20 @@ INT_SUFFIX [uUlL]{0,2} + const char *p; + + if (!ctx->last_was_newline) +- return T_HASHSTRING; ++ { ++ struct preproc_macro *macro; ++ ++ /* Stringification is only done for function-like macro bodies. ++ * Anywhere else, we need to parse it as two separate tokens. ++ * We could use a state for this, but yyless() is easier and cheap. ++ */ ++ ++ if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) ++ return T_HASHSTRING; ++ ++ yyless(1); ++ return T_TEXT; ++ } + + for (p = yytext + 1; strchr(" \t", *p); ++p) + ; +@@ -176,9 +198,9 @@ INT_SUFFIX [uUlL]{0,2} return T_NEWLINE; } @@ -20028,8 +28043,180 @@ index bb5a6b61de1..94079696280 100644 %% +@@ -217,13 +239,6 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) + return file->if_stack[file->if_count - 1].current_true; + } + +-static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) +-{ +- if (!ctx->expansion_count) +- return NULL; +- return ctx->expansion_stack[ctx->expansion_count - 1].macro; +-} +- + /* Concatenation is not done for object-like macros, but is done for both + * function-like macro bodies and their arguments. */ + static bool should_concat(struct preproc_ctx *ctx) +@@ -332,6 +347,43 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, + return true; + } + ++static void preproc_stringify(struct preproc_ctx *ctx, struct vkd3d_string_buffer *buffer, const char *text) ++{ ++ const struct preproc_text *expansion; ++ const char *p = text + 1; ++ unsigned int i; ++ ++ while (*p == ' ' || *p == '\t') ++ ++p; ++ ++ vkd3d_string_buffer_printf(buffer, "\""); ++ if ((expansion = find_arg_expansion(ctx, p))) ++ { ++ size_t len = expansion->text.content_size; ++ size_t start = 0; ++ ++ while (len && strchr(" \t\r\n", expansion->text.buffer[len - 1])) ++ --len; ++ ++ while (start < len && strchr(" \t\r\n", expansion->text.buffer[start])) ++ ++start; ++ ++ for (i = start; i < len; ++i) ++ { ++ char c = expansion->text.buffer[i]; ++ ++ if (c == '\\' || c == '"') ++ vkd3d_string_buffer_printf(buffer, "\\"); ++ vkd3d_string_buffer_printf(buffer, "%c", c); ++ } ++ } ++ else ++ { ++ vkd3d_string_buffer_printf(buffer, "%s", p); ++ } ++ vkd3d_string_buffer_printf(buffer, "\""); ++} ++ + int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + { + struct preproc_ctx *ctx = yyget_extra(scanner); +@@ -439,9 +491,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + switch (func_state->state) + { + case STATE_NONE: +- { +- struct preproc_macro *macro; +- + if (token == T_CONCAT && should_concat(ctx)) + { + while (ctx->buffer.content_size +@@ -450,37 +499,17 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + break; + } + +- /* Stringification, however, is only done for function-like +- * macro bodies. */ +- if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) ++ if (token == T_HASHSTRING) + { +- const struct preproc_text *expansion; +- const char *p = text + 1; +- unsigned int i; ++ struct vkd3d_string_buffer buffer; + + if (ctx->current_directive) + return return_token(token, lval, text); + +- while (*p == ' ' || *p == '\t') +- ++p; +- +- vkd3d_string_buffer_printf(&ctx->buffer, "\""); +- if ((expansion = find_arg_expansion(ctx, p))) +- { +- for (i = 0; i < expansion->text.content_size; ++i) +- { +- char c = expansion->text.buffer[i]; +- +- if (c == '\\' || c == '"') +- vkd3d_string_buffer_printf(&ctx->buffer, "\\"); +- vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); +- } +- } +- else +- { +- vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); +- } +- vkd3d_string_buffer_printf(&ctx->buffer, "\""); ++ vkd3d_string_buffer_init(&buffer); ++ preproc_stringify(ctx, &buffer, text); ++ vkd3d_string_buffer_printf(&ctx->buffer, "%s", buffer.buffer); ++ vkd3d_string_buffer_cleanup(&buffer); + break; + } + +@@ -584,7 +613,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + else + vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); + break; +- } + + case STATE_IDENTIFIER: + if (token == '(') +@@ -626,6 +654,41 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + + switch (token) + { ++ /* Most text gets left alone (e.g. if it contains macros, ++ * the macros should be evaluated later). ++ * Arguments are a special case, and are replaced with ++ * their values immediately. */ ++ case T_IDENTIFIER: ++ case T_IDENTIFIER_PAREN: ++ { ++ const struct preproc_text *expansion; ++ ++ if ((expansion = find_arg_expansion(ctx, text))) ++ { ++ preproc_push_expansion(ctx, expansion, NULL); ++ continue; ++ } ++ ++ if (current_arg) ++ preproc_text_add(current_arg, text); ++ break; ++ } ++ ++ /* Stringification is another special case. Unsurprisingly, ++ * we need to stringify if this is an argument. More ++ * surprisingly, we need to stringify even if it's not. */ ++ case T_HASHSTRING: ++ { ++ struct vkd3d_string_buffer buffer; ++ ++ vkd3d_string_buffer_init(&buffer); ++ preproc_stringify(ctx, &buffer, text); ++ if (current_arg) ++ preproc_text_add(current_arg, buffer.buffer); ++ vkd3d_string_buffer_cleanup(&buffer); ++ break; ++ } ++ + case T_NEWLINE: + if (current_arg) + preproc_text_add(current_arg, " "); +@@ -684,6 +747,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + if (current_arg) + preproc_text_add(current_arg, text); + } ++ ++ if (current_arg) ++ preproc_text_add(current_arg, " "); + break; + } + } diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 53e13735937..3542b5fac51 100644 +index 53e13735937..8285b56a17c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -18,7 +18,7 @@ @@ -20050,23 +28237,69 @@ index 53e13735937..3542b5fac51 100644 unsigned int index) { switch (sysval) -@@ -199,14 +199,9 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu +@@ -199,14 +199,19 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu } } -static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) --{ ++static inline bool register_is_undef(const struct vkd3d_shader_register *reg) + { - return vkd3d_siv_from_sysval_indexed(sysval, 0); --} -- ++ return reg->type == VKD3DSPR_UNDEF; ++} ++ ++static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) ++{ ++ return register_is_constant(reg) || register_is_undef(reg); + } + #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 7 -+#define VKD3D_SPIRV_GENERATOR_VERSION 8 ++#define VKD3D_SPIRV_GENERATOR_VERSION 9 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) struct vkd3d_spirv_stream -@@ -1967,11 +1962,9 @@ struct vkd3d_symbol_register_data +@@ -1751,6 +1756,38 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, + } + } + ++static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, ++ enum vkd3d_data_type data_type, unsigned int component_count) ++{ ++ uint32_t scalar_id; ++ ++ if (component_count == 1) ++ { ++ switch (data_type) ++ { ++ case VKD3D_DATA_FLOAT: ++ case VKD3D_DATA_SNORM: ++ case VKD3D_DATA_UNORM: ++ return vkd3d_spirv_get_op_type_float(builder, 32); ++ break; ++ case VKD3D_DATA_INT: ++ case VKD3D_DATA_UINT: ++ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); ++ break; ++ case VKD3D_DATA_DOUBLE: ++ return vkd3d_spirv_get_op_type_float(builder, 64); ++ default: ++ FIXME("Unhandled data type %#x.\n", data_type); ++ return 0; ++ } ++ } ++ else ++ { ++ scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); ++ return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); ++ } ++} ++ + static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) + { + vkd3d_spirv_stream_init(&builder->debug_stream); +@@ -1967,11 +2004,9 @@ struct vkd3d_symbol_register_data uint32_t member_idx; enum vkd3d_shader_component_type component_type; unsigned int write_mask; @@ -20078,7 +28311,7 @@ index 53e13735937..3542b5fac51 100644 }; struct vkd3d_symbol_resource_data -@@ -2064,10 +2057,14 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, +@@ -2064,10 +2099,14 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, symbol->type = VKD3D_SYMBOL_REGISTER; memset(&symbol->key, 0, sizeof(symbol->key)); symbol->key.reg.type = reg->type; @@ -20096,7 +28329,7 @@ index 53e13735937..3542b5fac51 100644 } static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, -@@ -2080,11 +2077,9 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, +@@ -2080,11 +2119,9 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, symbol->info.reg.member_idx = 0; symbol->info.reg.component_type = component_type; symbol->info.reg.write_mask = write_mask; @@ -20108,7 +28341,7 @@ index 53e13735937..3542b5fac51 100644 } static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, -@@ -2197,11 +2192,7 @@ struct vkd3d_push_constant_buffer_binding +@@ -2197,11 +2234,7 @@ struct vkd3d_push_constant_buffer_binding struct vkd3d_shader_phase { @@ -20120,7 +28353,7 @@ index 53e13735937..3542b5fac51 100644 size_t function_location; }; -@@ -2253,10 +2244,11 @@ struct spirv_compiler +@@ -2253,10 +2286,11 @@ struct spirv_compiler struct vkd3d_push_constant_buffer_binding *push_constants; const struct vkd3d_shader_spirv_target_info *spirv_target_info; @@ -20135,7 +28368,13 @@ index 53e13735937..3542b5fac51 100644 const struct vkd3d_shader_transform_feedback_info *xfb_info; struct vkd3d_shader_output_info { -@@ -2276,9 +2268,10 @@ struct spirv_compiler +@@ -2271,14 +2305,15 @@ struct spirv_compiler + + uint32_t binding_idx; + +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; + unsigned int input_control_point_count; unsigned int output_control_point_count; bool use_vocp; @@ -20149,7 +28388,7 @@ index 53e13735937..3542b5fac51 100644 uint32_t current_spec_constant_id; unsigned int spec_constant_count; -@@ -2290,9 +2283,19 @@ struct spirv_compiler +@@ -2290,9 +2325,19 @@ struct spirv_compiler struct vkd3d_string_buffer_cache string_buffers; }; @@ -20171,12 +28410,13 @@ index 53e13735937..3542b5fac51 100644 } static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler); -@@ -2304,13 +2307,37 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil +@@ -2304,13 +2349,37 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil return info && info->entry_point ? info->entry_point : "main"; } -struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, +static void spirv_compiler_destroy(struct spirv_compiler *compiler) +{ + vkd3d_free(compiler->control_flow_info); @@ -20203,7 +28443,7 @@ index 53e13735937..3542b5fac51 100644 + +static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, + struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) { - const struct vkd3d_shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; @@ -20213,23 +28453,18 @@ index 53e13735937..3542b5fac51 100644 const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; -@@ -2402,9 +2429,12 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * +@@ -2402,10 +2471,6 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * compiler->shader_type = shader_version->type; - compiler->input_signature = &shader_desc->input_signature; - compiler->output_signature = &shader_desc->output_signature; - compiler->patch_constant_signature = &shader_desc->patch_constant_signature; -+ compiler->input_signature = shader_desc->input_signature; -+ compiler->output_signature = shader_desc->output_signature; -+ compiler->patch_constant_signature = shader_desc->patch_constant_signature; -+ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -+ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -+ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); - +- if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { -@@ -2437,6 +2467,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * + compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); +@@ -2437,6 +2502,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * compiler->scan_descriptor_info = scan_descriptor_info; @@ -20238,7 +28473,104 @@ index 53e13735937..3542b5fac51 100644 vkd3d_string_buffer_cache_init(&compiler->string_buffers); spirv_compiler_emit_initial_declarations(compiler); -@@ -2857,7 +2889,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s +@@ -2504,13 +2571,13 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * + } + + static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( +- const struct spirv_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) ++ const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) + { +- unsigned int register_space = cb->range.space; +- unsigned int reg_idx = cb->range.first; ++ unsigned int register_space = range->space; ++ unsigned int reg_idx = range->first; + unsigned int i; + +- if (cb->range.first != cb->range.last) ++ if (range->first != range->last) + return NULL; + + for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) +@@ -2527,8 +2594,8 @@ static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_const + return NULL; + } + +-static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *compiler, +- const struct vkd3d_shader_resource *resource, const struct vkd3d_shader_sampler *sampler) ++static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range) + { + const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; + const struct vkd3d_shader_combined_resource_sampler *combined_sampler; +@@ -2537,10 +2604,35 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com + if (!shader_interface->combined_sampler_count) + return false; + +- if (resource && (resource->reg.reg.type == VKD3DSPR_UAV || resource->range.last != resource->range.first)) ++ if (range->last != range->first) ++ return false; ++ ++ for (i = 0; i < shader_interface->combined_sampler_count; ++i) ++ { ++ combined_sampler = &shader_interface->combined_samplers[i]; ++ ++ if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) ++ continue; ++ ++ if ((combined_sampler->resource_space == range->space ++ && combined_sampler->resource_index == range->first)) ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range) ++{ ++ const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; ++ const struct vkd3d_shader_combined_resource_sampler *combined_sampler; ++ unsigned int i; ++ ++ if (!shader_interface->combined_sampler_count) + return false; + +- if (sampler && sampler->range.first != sampler->range.last) ++ if (range->last != range->first) + return false; + + for (i = 0; i < shader_interface->combined_sampler_count; ++i) +@@ -2550,10 +2642,8 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com + if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) + continue; + +- if ((!resource || (combined_sampler->resource_space == resource->range.space +- && combined_sampler->resource_index == resource->range.first)) +- && (!sampler || (combined_sampler->sampler_space == sampler->range.space +- && combined_sampler->sampler_index == sampler->range.first))) ++ if (combined_sampler->sampler_space == range->space ++ && combined_sampler->sampler_index == range->first) + return true; + } + +@@ -2571,6 +2661,16 @@ static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_error(struct spirv_compiler * + compiler->failed = true; + } + ++static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_warning(struct spirv_compiler *compiler, ++ enum vkd3d_shader_error error, const char *format, ...) ++{ ++ va_list args; ++ ++ va_start(args, format); ++ vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, format, args); ++ va_end(args); ++} ++ + static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spirv_compiler *compiler, + const struct vkd3d_shader_register_range *range) + { +@@ -2857,7 +2957,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s { unsigned int idx; @@ -20247,7 +28579,7 @@ index 53e13735937..3542b5fac51 100644 switch (reg->type) { case VKD3DSPR_RESOURCE: -@@ -2887,12 +2919,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s +@@ -2887,12 +2987,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s case VKD3DSPR_DEPTHOUTLE: snprintf(buffer, buffer_size, "oDepth"); break; @@ -20260,7 +28592,7 @@ index 53e13735937..3542b5fac51 100644 case VKD3DSPR_GSINSTID: snprintf(buffer, buffer_size, "vGSInstanceID"); break; -@@ -2965,18 +2991,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, +@@ -2965,18 +3059,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, @@ -20291,7 +28623,7 @@ index 53e13735937..3542b5fac51 100644 ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } -@@ -3169,7 +3203,6 @@ struct vkd3d_shader_register_info +@@ -3169,7 +3271,6 @@ struct vkd3d_shader_register_info unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; @@ -20299,7 +28631,23 @@ index 53e13735937..3542b5fac51 100644 }; static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, -@@ -3192,7 +3225,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil +@@ -3178,13 +3279,13 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil + struct vkd3d_symbol reg_symbol, *symbol; + struct rb_entry *entry; + +- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); ++ assert(!register_is_constant_or_undef(reg)); + + if (reg->type == VKD3DSPR_TEMP) + { + assert(reg->idx[0].offset < compiler->temp_count); + register_info->id = compiler->temp_id + reg->idx[0].offset; +- register_info->storage_class = SpvStorageClassFunction; ++ register_info->storage_class = SpvStorageClassPrivate; + register_info->descriptor_array = NULL; + register_info->member_idx = 0; + register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; +@@ -3192,7 +3293,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = 0; register_info->binding_base_idx = 0; register_info->is_aggregate = false; @@ -20307,7 +28655,7 @@ index 53e13735937..3542b5fac51 100644 return true; } -@@ -3214,7 +3246,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil +@@ -3214,7 +3314,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = symbol->info.reg.structure_stride; register_info->binding_base_idx = symbol->info.reg.binding_base_idx; register_info->is_aggregate = symbol->info.reg.is_aggregate; @@ -20315,7 +28663,7 @@ index 53e13735937..3542b5fac51 100644 return true; } -@@ -3344,41 +3375,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp +@@ -3344,41 +3443,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp } else if (register_info->is_aggregate) { @@ -20367,7 +28715,66 @@ index 53e13735937..3542b5fac51 100644 indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); } -@@ -4249,35 +4261,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp +@@ -3541,6 +3621,19 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi + vkd3d_component_type_from_data_type(reg->data_type), component_count, values); + } + ++static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register *reg, DWORD write_mask) ++{ ++ unsigned int component_count = vkd3d_write_mask_component_count(write_mask); ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ uint32_t type_id; ++ ++ assert(reg->type == VKD3DSPR_UNDEF); ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); ++ return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); ++} ++ + static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, + const struct vkd3d_shader_register_info *reg_info) +@@ -3551,7 +3644,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, + enum vkd3d_shader_component_type component_type; + unsigned int skipped_component_mask; + +- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); ++ assert(!register_is_constant_or_undef(reg)); + assert(vkd3d_write_mask_component_count(write_mask) == 1); + + component_idx = vkd3d_write_mask_get_component_idx(write_mask); +@@ -3603,6 +3696,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); + else if (reg->type == VKD3DSPR_IMMCONST64) + return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); ++ else if (reg->type == VKD3DSPR_UNDEF) ++ return spirv_compiler_emit_load_undef(compiler, reg, write_mask); + + component_count = vkd3d_write_mask_component_count(write_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); +@@ -3815,7 +3910,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, + unsigned int src_write_mask = write_mask; + uint32_t type_id; + +- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); ++ assert(!register_is_constant_or_undef(reg)); + + if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) + return; +@@ -3986,6 +4081,11 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler + vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); + break; ++ case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: ++ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); ++ vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); ++ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); ++ break; + default: + FIXME("Unhandled interpolation mode %#x.\n", mode); + break; +@@ -4249,35 +4349,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp if ((builtin = get_spirv_builtin_for_register(reg_type))) return builtin; @@ -20405,7 +28812,7 @@ index 53e13735937..3542b5fac51 100644 static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) { struct vkd3d_shader_register r; -@@ -4288,6 +4277,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler +@@ -4288,6 +4365,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler r.type = VKD3DSPR_OUTPOINTID; r.idx[0].offset = ~0u; r.idx[1].offset = ~0u; @@ -20413,7 +28820,7 @@ index 53e13735937..3542b5fac51 100644 return spirv_compiler_get_register_id(compiler, &r); } -@@ -4302,7 +4292,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co +@@ -4302,7 +4380,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co } static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compiler, @@ -20422,7 +28829,7 @@ index 53e13735937..3542b5fac51 100644 { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const char *name; -@@ -4310,7 +4300,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile +@@ -4310,7 +4388,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile if (!suffix) suffix = ""; @@ -20431,7 +28838,7 @@ index 53e13735937..3542b5fac51 100644 { case VKD3DSIH_HS_CONTROL_POINT_PHASE: name = "control"; -@@ -4322,62 +4312,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile +@@ -4322,62 +4400,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile name = "join"; break; default: @@ -20499,7 +28906,7 @@ index 53e13735937..3542b5fac51 100644 { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; const struct vkd3d_shader_transform_feedback_element *xfb_element; -@@ -4436,17 +4387,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, +@@ -4436,17 +4475,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); } @@ -20527,7 +28934,7 @@ index 53e13735937..3542b5fac51 100644 vkd3d_spirv_add_iface_variable(builder, id); spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin); -@@ -4458,54 +4413,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp +@@ -4458,54 +4501,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp return id; } @@ -20611,7 +29018,7 @@ index 53e13735937..3542b5fac51 100644 } static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, -@@ -4513,50 +4459,35 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4513,50 +4547,35 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, enum vkd3d_shader_interpolation_mode interpolation_mode) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -20674,7 +29081,7 @@ index 53e13735937..3542b5fac51 100644 sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic); builtin = get_spirv_builtin_for_sysval(compiler, sysval); -@@ -4576,12 +4507,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4576,12 +4595,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); } @@ -20694,7 +29101,7 @@ index 53e13735937..3542b5fac51 100644 storage_class = SpvStorageClassInput; -@@ -4589,111 +4524,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4589,111 +4612,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) { @@ -20786,7 +29193,7 @@ index 53e13735937..3542b5fac51 100644 { - var_id = input_id; - if (use_private_var) -+ unsigned int location = signature_element->register_index; ++ unsigned int location = signature_element->target_location; + + input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, input_component_count, array_sizes, 2); @@ -20847,7 +29254,7 @@ index 53e13735937..3542b5fac51 100644 { ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, i); -@@ -4708,7 +4600,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4708,7 +4688,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, builtin->member_idx); val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); @@ -20856,7 +29263,7 @@ index 53e13735937..3542b5fac51 100644 } val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone); -@@ -4743,9 +4635,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, +@@ -4743,9 +4723,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t input_id; @@ -20868,7 +29275,7 @@ index 53e13735937..3542b5fac51 100644 if (!(builtin = get_spirv_builtin_for_register(reg->type))) { -@@ -4763,19 +4654,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, +@@ -4763,19 +4742,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, input_id, SpvStorageClassInput, builtin->component_type, write_mask); @@ -20889,7 +29296,7 @@ index 53e13735937..3542b5fac51 100644 switch (reg->type) { -@@ -4787,10 +4674,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil +@@ -4787,10 +4762,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil case VKD3DSPR_PRIMID: spirv_compiler_emit_input_register(compiler, dst); return; @@ -20900,7 +29307,7 @@ index 53e13735937..3542b5fac51 100644 case VKD3DSPR_OUTPOINTID: /* Emitted in spirv_compiler_emit_initial_declarations(). */ case VKD3DSPR_OUTCONTROLPOINT: /* See spirv_compiler_leave_shader_phase(). */ return; -@@ -4798,22 +4681,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil +@@ -4798,22 +4769,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil FIXME("Unhandled shader phase input register %#x.\n", reg->type); return; } @@ -20923,7 +29330,7 @@ index 53e13735937..3542b5fac51 100644 } static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, -@@ -4835,8 +4702,7 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) +@@ -4835,58 +4790,32 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; } @@ -20931,18 +29338,30 @@ index 53e13735937..3542b5fac51 100644 - uint32_t *mask) +static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) { ++ unsigned int write_mask; ++ if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) { -@@ -4847,38 +4713,10 @@ static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signa - *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); + FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); + return; + } + +- *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); ++ write_mask = e->mask >> vkd3d_write_mask_get_component_idx(e->mask); ++ *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); } -static uint32_t calculate_sysval_array_mask(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature *signature, enum vkd3d_shader_input_sysval_semantic sysval) --{ ++/* Emits arrayed SPIR-V built-in variables. */ ++static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) + { - const struct vkd3d_shader_signature_element *e; - const struct vkd3d_spirv_builtin *sig_builtin; -- const struct vkd3d_spirv_builtin *builtin; ++ const struct shader_signature *output_signature = &compiler->output_signature; ++ uint32_t clip_distance_mask = 0, clip_distance_id = 0; ++ uint32_t cull_distance_mask = 0, cull_distance_id = 0; + const struct vkd3d_spirv_builtin *builtin; - uint32_t signature_idx, mask = 0; - - if (!(builtin = get_spirv_builtin_for_sysval(compiler, sysval))) @@ -20950,9 +29369,11 @@ index 53e13735937..3542b5fac51 100644 - FIXME("Unhandled sysval %#x.\n", sysval); - return 0; - } -- ++ unsigned int i, count; + - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) -- { ++ for (i = 0; i < output_signature->element_count; ++i) + { - e = &signature->elements[signature_idx]; - - sig_builtin = get_spirv_builtin_for_sysval(compiler, @@ -20965,15 +29386,23 @@ index 53e13735937..3542b5fac51 100644 - return mask; -} - - /* Emits arrayed SPIR-V built-in variables. */ - static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) - { +-/* Emits arrayed SPIR-V built-in variables. */ +-static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) +-{ - const struct vkd3d_shader_signature *output_signature = compiler->output_signature; -+ const struct shader_signature *output_signature = &compiler->output_signature; - uint32_t clip_distance_mask = 0, clip_distance_id = 0; - uint32_t cull_distance_mask = 0, cull_distance_id = 0; - const struct vkd3d_spirv_builtin *builtin; -@@ -4886,7 +4724,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * +- uint32_t clip_distance_mask = 0, clip_distance_id = 0; +- uint32_t cull_distance_mask = 0, cull_distance_id = 0; +- const struct vkd3d_spirv_builtin *builtin; +- unsigned int i, count; +- +- for (i = 0; i < output_signature->element_count; ++i) +- { +- const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; ++ const struct signature_element *e = &output_signature->elements[i]; + + switch (e->sysval_semantic) + { +@@ -4921,7 +4850,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * for (i = 0; i < output_signature->element_count; ++i) { @@ -20982,16 +29411,7 @@ index 53e13735937..3542b5fac51 100644 switch (e->sysval_semantic) { -@@ -4921,7 +4759,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * - - for (i = 0; i < output_signature->element_count; ++i) - { -- const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; -+ const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { -@@ -4953,9 +4791,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +@@ -4953,9 +4882,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t output_id; @@ -21003,7 +29423,7 @@ index 53e13735937..3542b5fac51 100644 if (!(builtin = get_spirv_builtin_for_register(reg->type))) { -@@ -4969,7 +4806,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +@@ -4969,7 +4897,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, output_id, SpvStorageClassOutput, builtin->component_type, write_mask); @@ -21011,7 +29431,7 @@ index 53e13735937..3542b5fac51 100644 reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_execution_mode(compiler, reg); -@@ -4977,7 +4813,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +@@ -4977,7 +4904,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, } static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, @@ -21020,7 +29440,7 @@ index 53e13735937..3542b5fac51 100644 { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t *variable_id, id; -@@ -4993,7 +4829,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c +@@ -4993,7 +4920,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c return *variable_id; id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); @@ -21029,7 +29449,7 @@ index 53e13735937..3542b5fac51 100644 vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); if (variable_id) -@@ -5005,44 +4841,34 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, +@@ -5005,50 +4932,39 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -21058,13 +29478,13 @@ index 53e13735937..3542b5fac51 100644 - phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); +- +- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + is_patch_constant = is_in_fork_or_join_phase(compiler); -- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; +- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; + shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; -- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; -- - if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - &signature_idx, reg->idx[0].offset, dst->write_mask))) - { @@ -21085,7 +29505,13 @@ index 53e13735937..3542b5fac51 100644 if (builtin) { component_type = builtin->component_type; -@@ -5058,128 +4884,103 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + if (!builtin->spirv_array_size) + output_component_count = builtin->component_count; +- component_idx = 0; + } + else + { +@@ -5058,128 +4974,104 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, storage_class = SpvStorageClassOutput; if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE @@ -21096,14 +29522,11 @@ index 53e13735937..3542b5fac51 100644 + || needs_private_io_variable(builtin)) + { use_private_variable = true; -+ reg_write_mask = write_mask; -+ } - else -+ { - component_idx = vkd3d_write_mask_get_component_idx(write_mask); -+ reg_write_mask = write_mask >> component_idx; +- else +- component_idx = vkd3d_write_mask_get_component_idx(write_mask); + } ++ reg_write_mask = write_mask >> component_idx; vkd3d_symbol_make_register(®_symbol, reg); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) @@ -21164,12 +29587,12 @@ index 53e13735937..3542b5fac51 100644 - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); - } -- ++ id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); + - if (component_idx) - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); - } -+ id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); - +- - if (is_patch_constant) - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); - @@ -21181,12 +29604,18 @@ index 53e13735937..3542b5fac51 100644 } - - if (!symbol) ++ else if (signature_element->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) ++ { ++ storage_class = SpvStorageClassPrivate; ++ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, ++ storage_class, component_type, output_component_count, array_sizes, 2); ++ } + else { - var_id = id; - if (use_private_variable) - storage_class = SpvStorageClassPrivate; -+ unsigned int location = signature_element->register_index; ++ unsigned int location = signature_element->target_location; + if (is_patch_constant) - var_id = compiler->hs.patch_constants_id; @@ -21204,12 +29633,12 @@ index 53e13735937..3542b5fac51 100644 + storage_class, component_type, output_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, id); + -+ if (is_dual_source_blending(compiler) && signature_element->register_index < 2) ++ if (is_dual_source_blending(compiler) && location < 2) { - reg_symbol.info.reg.member_idx = spirv_compiler_get_invocation_id(compiler); - reg_symbol.info.reg.is_dynamically_indexed = true; + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); -+ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); ++ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, location); } - else if (is_patch_constant) + else @@ -21278,7 +29707,7 @@ index 53e13735937..3542b5fac51 100644 { enum vkd3d_shader_input_sysval_semantic sysval; const struct vkd3d_spirv_builtin *builtin; -@@ -5198,14 +4999,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com +@@ -5198,14 +5090,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com } static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, @@ -21295,7 +29724,7 @@ index 53e13735937..3542b5fac51 100644 unsigned int i, index, array_idx; uint32_t output_id; -@@ -5224,6 +5025,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi +@@ -5224,6 +5116,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi use_mask |= element->used_mask; } } @@ -21305,7 +29734,7 @@ index 53e13735937..3542b5fac51 100644 write_mask &= dst_write_mask; if (!write_mask) -@@ -5294,22 +5098,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * +@@ -5294,22 +5189,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -21331,7 +29760,7 @@ index 53e13735937..3542b5fac51 100644 function_id = compiler->epilogue_function_id; -@@ -5340,7 +5141,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * +@@ -5340,7 +5232,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); } @@ -21340,7 +29769,7 @@ index 53e13735937..3542b5fac51 100644 output_index_id = spirv_compiler_emit_load_invocation_id(compiler); for (i = 0; i < signature->element_count; ++i) -@@ -5348,14 +5149,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * +@@ -5348,14 +5240,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * if (!compiler->output_info[i].id) continue; @@ -21357,7 +29786,7 @@ index 53e13735937..3542b5fac51 100644 } vkd3d_spirv_build_op_return(&compiler->spirv_builder); -@@ -5375,28 +5174,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp +@@ -5375,28 +5265,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp dst.reg.type = VKD3DSPR_OUTPOINTID; dst.reg.idx[0].offset = ~0u; dst.reg.idx[1].offset = ~0u; @@ -21387,7 +29816,7 @@ index 53e13735937..3542b5fac51 100644 static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; -@@ -5410,7 +5192,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp +@@ -5410,7 +5283,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp case VKD3D_SHADER_TYPE_HULL: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); spirv_compiler_emit_hull_shader_builtins(compiler); @@ -21395,7 +29824,7 @@ index 53e13735937..3542b5fac51 100644 break; case VKD3D_SHADER_TYPE_DOMAIN: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); -@@ -5439,8 +5220,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp +@@ -5439,8 +5311,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) { vkd3d_spirv_builder_begin_main_function(builder); @@ -21405,7 +29834,32 @@ index 53e13735937..3542b5fac51 100644 } } -@@ -5522,12 +5302,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil +@@ -5478,8 +5349,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler + WARN("Unhandled global flags %#x.\n", flags); + } + +-static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + size_t function_location; +@@ -5490,11 +5360,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); + + assert(!compiler->temp_count); +- compiler->temp_count = instruction->declaration.count; ++ compiler->temp_count = count; + for (i = 0; i < compiler->temp_count; ++i) + { +- id = spirv_compiler_emit_variable(compiler, &builder->function_stream, +- SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); ++ id = spirv_compiler_emit_variable(compiler, &builder->global_stream, ++ SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (!i) + compiler->temp_id = id; + assert(id == compiler->temp_id + i); +@@ -5522,12 +5392,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil reg.type = VKD3DSPR_IDXTEMP; reg.idx[0].offset = temp->register_idx; reg.idx[1].offset = ~0u; @@ -21420,30 +29874,307 @@ index 53e13735937..3542b5fac51 100644 spirv_compiler_emit_register_debug_name(builder, id, ®); -@@ -6097,6 +5878,7 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; -+ enum vkd3d_shader_resource_type resource_type = semantic->resource_type; - uint32_t flags = instruction->flags; - - /* We don't distinguish between APPEND and COUNTER UAVs. */ -@@ -6104,8 +5886,13 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, - if (flags) - FIXME("Unhandled UAV flags %#x.\n", flags); - -+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; -+ - spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, -- semantic->resource_type, semantic->resource_data_type[0], 0, false); -+ resource_type, semantic->resource_data_type[0], 0, false); +@@ -5692,50 +5563,55 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * + return var_id; } - static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, -@@ -6185,10 +5972,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, +-static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes) + { +- const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; + const SpvStorageClass storage_class = SpvStorageClassUniform; +- const struct vkd3d_shader_register *reg = &cb->src.reg; + struct vkd3d_push_constant_buffer_binding *push_cb; + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_symbol reg_symbol; ++ unsigned int size; ++ ++ struct vkd3d_shader_register reg = ++ { ++ .type = VKD3DSPR_CONSTBUFFER, ++ .idx[0].offset = register_id, ++ .idx_count = 1, ++ }; + +- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); ++ size = size_in_bytes / (VKD3D_VEC4_SIZE * sizeof(uint32_t)); + +- if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, cb))) ++ if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) + { + /* Push constant buffers are handled in + * spirv_compiler_emit_push_constant_buffers(). + */ +- unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); +- push_cb->reg = *reg; +- push_cb->size = cb->size; +- if (cb_size_in_bytes > push_cb->pc.size) ++ push_cb->reg = reg; ++ push_cb->size = size; ++ if (size_in_bytes > push_cb->pc.size) + { + WARN("Constant buffer size %u exceeds push constant size %u.\n", +- cb_size_in_bytes, push_cb->pc.size); ++ size_in_bytes, push_cb->pc.size); + } + return; + } + + vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); +- length_id = spirv_compiler_get_constant_uint(compiler, cb->size); ++ length_id = spirv_compiler_get_constant_uint(compiler, size); + array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); + vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16); + + struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); + vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); +- vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); ++ vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); + + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, +- reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); ++ ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); + +- vkd3d_symbol_make_register(®_symbol, reg); ++ vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + reg_symbol.descriptor_array = var_info.array_symbol; +@@ -5776,29 +5652,34 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi + spirv_compiler_put_symbol(compiler, ®_symbol); + } + +-static void spirv_compiler_emit_dcl_sampler(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_register_range *range, unsigned int register_id) + { +- const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; + const SpvStorageClass storage_class = SpvStorageClassUniformConstant; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_register *reg = &sampler->src.reg; + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_symbol reg_symbol; + uint32_t type_id, var_id; + +- vkd3d_symbol_make_sampler(®_symbol, reg); +- reg_symbol.info.sampler.range = sampler->range; ++ const struct vkd3d_shader_register reg = ++ { ++ .type = VKD3DSPR_SAMPLER, ++ .idx[0].offset = register_id, ++ .idx_count = 1, ++ }; ++ ++ vkd3d_symbol_make_sampler(®_symbol, ®); ++ reg_symbol.info.sampler.range = *range; + spirv_compiler_put_symbol(compiler, ®_symbol); + +- if (spirv_compiler_has_combined_sampler(compiler, NULL, sampler)) ++ if (spirv_compiler_has_combined_sampler_for_sampler(compiler, range)) + return; + + type_id = vkd3d_spirv_get_op_type_sampler(builder); +- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, +- &sampler->range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); ++ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, ++ range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); + +- vkd3d_symbol_make_register(®_symbol, reg); ++ vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); + reg_symbol.descriptor_array = var_info.array_symbol; +@@ -5843,13 +5724,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty + } + } + +-static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( ++static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( + struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, + const struct vkd3d_shader_register_range *range) + { +- const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; + unsigned int register_last = (range->last == ~0u) ? range->first : range->last; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + unsigned int i; + + for (i = 0; i < descriptor_info->descriptor_count; ++i) +@@ -5869,7 +5750,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler + bool raw_structured, uint32_t depth) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + bool uav_read, uav_atomics; + uint32_t sampled_type_id; + SpvImageFormat format; +@@ -5904,7 +5785,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi + const struct vkd3d_shader_combined_resource_sampler *current; + uint32_t image_type_id, type_id, ptr_type_id, var_id; + enum vkd3d_shader_binding_flag resource_type_flag; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + struct vkd3d_symbol symbol; + unsigned int i; + bool depth; +@@ -5980,20 +5861,30 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi + } + + static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, +- const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, +- enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw) ++ const struct vkd3d_shader_register_range *range, unsigned int register_id, ++ unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, ++ enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) + { + struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + SpvStorageClass storage_class = SpvStorageClassUniformConstant; + uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; +- const struct vkd3d_shader_register *reg = &resource->reg.reg; + const struct vkd3d_spirv_resource_type *resource_type_info; + enum vkd3d_shader_component_type sampled_type; + struct vkd3d_symbol resource_symbol; +- bool is_uav; + +- is_uav = reg->type == VKD3DSPR_UAV; ++ struct vkd3d_shader_register reg = ++ { ++ .type = is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, ++ .idx[0].offset = register_id, ++ .idx_count = 1, ++ }; ++ ++ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && sample_count == 1) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; ++ + if (!(resource_type_info = spirv_compiler_enable_resource_type(compiler, + resource_type, is_uav))) + { +@@ -6001,11 +5892,11 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + return; + } + +- sampled_type = vkd3d_component_type_from_data_type(resource_data_type); ++ sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); + +- if (spirv_compiler_has_combined_sampler(compiler, resource, NULL)) ++ if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) + { +- spirv_compiler_emit_combined_sampler_declarations(compiler, reg, &resource->range, ++ spirv_compiler_emit_combined_sampler_declarations(compiler, ®, range, + resource_type, sampled_type, structure_stride, raw, resource_type_info); + return; + } +@@ -6028,19 +5919,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + } + else + { +- type_id = spirv_compiler_get_image_type_id(compiler, reg, &resource->range, ++ type_id = spirv_compiler_get_image_type_id(compiler, ®, range, + resource_type_info, sampled_type, structure_stride || raw, 0); + } + +- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, +- &resource->range, resource_type, false, &var_info); ++ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, ++ range, resource_type, false, &var_info); + + if (is_uav) + { +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + +- d = spirv_compiler_get_descriptor_info(compiler, +- VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); ++ d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); + + if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) + vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); +@@ -6072,15 +5962,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + type_id = struct_id; + } + +- counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, +- &resource->range, resource_type, true, &counter_var_info); ++ counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, ++ type_id, ®, range, resource_type, true, &counter_var_info); + } + } + +- vkd3d_symbol_make_resource(&resource_symbol, reg); ++ vkd3d_symbol_make_resource(&resource_symbol, ®); + resource_symbol.id = var_id; + resource_symbol.descriptor_array = var_info.array_symbol; +- resource_symbol.info.resource.range = resource->range; ++ resource_symbol.info.resource.range = *range; + resource_symbol.info.resource.sampled_type = sampled_type; + resource_symbol.info.resource.type_id = type_id; + resource_symbol.info.resource.resource_type_info = resource_type_info; +@@ -6093,52 +5983,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + spirv_compiler_put_symbol(compiler, &resource_symbol); + } + +-static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; +- uint32_t flags = instruction->flags; +- +- /* We don't distinguish between APPEND and COUNTER UAVs. */ +- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; +- if (flags) +- FIXME("Unhandled UAV flags %#x.\n", flags); +- +- spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, +- semantic->resource_type, semantic->resource_data_type[0], 0, false); +-} +- +-static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_raw_resource *resource = &instruction->declaration.raw_resource; +- uint32_t flags = instruction->flags; +- +- /* We don't distinguish between APPEND and COUNTER UAVs. */ +- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; +- if (flags) +- FIXME("Unhandled UAV flags %#x.\n", flags); +- +- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, 0, true); +-} +- +-static void spirv_compiler_emit_dcl_resource_structured(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_structured_resource *resource = &instruction->declaration.structured_resource; +- unsigned int stride = resource->byte_stride; +- uint32_t flags = instruction->flags; +- +- /* We don't distinguish between APPEND and COUNTER UAVs. */ +- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; +- if (flags) +- FIXME("Unhandled UAV flags %#x.\n", flags); +- +- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, stride / 4, false); +-} +- + static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) + { +@@ -6185,10 +6029,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; @@ -21456,7 +30187,7 @@ index 53e13735937..3542b5fac51 100644 else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) spirv_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); else -@@ -6224,7 +6010,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, +@@ -6224,7 +6067,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; @@ -21466,7 +30197,7 @@ index 53e13735937..3542b5fac51 100644 spirv_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); else spirv_compiler_emit_output_register(compiler, dst); -@@ -6242,64 +6029,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, +@@ -6242,64 +6086,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, spirv_compiler_emit_output(compiler, dst, sysval); } @@ -21531,7 +30262,7 @@ index 53e13735937..3542b5fac51 100644 static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { -@@ -6495,157 +6224,83 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler +@@ -6495,157 +6281,80 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); } @@ -21548,14 +30279,14 @@ index 53e13735937..3542b5fac51 100644 - struct vkd3d_shader_register reg; - struct rb_entry *entry; - unsigned int i; -+ + +- vkd3d_spirv_build_op_function_end(builder); + if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) + spirv_compiler_emit_default_control_point_phase(compiler); - vkd3d_spirv_build_op_function_end(builder); - - compiler->temp_id = 0; - compiler->temp_count = 0; +- compiler->temp_id = 0; +- compiler->temp_count = 0; ++ vkd3d_spirv_build_op_function_end(builder); - /* - * vocp inputs in fork and join shader phases are outputs of the control @@ -21635,31 +30366,31 @@ index 53e13735937..3542b5fac51 100644 - if ((previous_phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_leave_shader_phase(compiler, previous_phase); -+ assert(compiler->phase != instruction->handler_idx); - +- - if (!vkd3d_array_reserve((void **)&compiler->shader_phases, &compiler->shader_phases_size, - compiler->shader_phase_count + 1, sizeof(*compiler->shader_phases))) - return; - phase = &compiler->shader_phases[compiler->shader_phase_count]; -+ if (!is_in_default_phase(compiler)) -+ spirv_compiler_leave_shader_phase(compiler); - +- - phase->type = instruction->handler_idx; - phase->idx = compiler->shader_phase_count; - phase->instance_count = 0; - phase->function_id = 0; - phase->instance_id = 0; - phase->function_location = 0; -+ function_id = vkd3d_spirv_alloc_id(builder); ++ assert(compiler->phase != instruction->handler_idx); - ++compiler->shader_phase_count; -} -- ++ if (!is_in_default_phase(compiler)) ++ spirv_compiler_leave_shader_phase(compiler); + -static int spirv_compiler_emit_shader_phase_instance_count(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - struct vkd3d_shader_phase *phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; -- ++ function_id = vkd3d_spirv_alloc_id(builder); + - if (!compiler->shader_phase_count - || (phase->type != VKD3DSIH_HS_FORK_PHASE && phase->type != VKD3DSIH_HS_JOIN_PHASE) - || phase->function_id) @@ -21721,7 +30452,7 @@ index 53e13735937..3542b5fac51 100644 uint32_t invocation_id; unsigned int i; -@@ -6657,6 +6312,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile +@@ -6657,6 +6366,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile invocation.reg.idx[0].offset = ~0u; invocation.reg.idx[1].offset = ~0u; invocation.reg.idx[2].offset = ~0u; @@ -21729,7 +30460,7 @@ index 53e13735937..3542b5fac51 100644 invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; memset(&input_reg, 0, sizeof(input_reg)); -@@ -6664,37 +6320,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile +@@ -6664,37 +6374,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile input_reg.data_type = VKD3D_DATA_FLOAT; input_reg.idx[0].rel_addr = &invocation; input_reg.idx[2].offset = ~0u; @@ -21782,7 +30513,7 @@ index 53e13735937..3542b5fac51 100644 } static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, -@@ -6723,95 +6384,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi +@@ -6723,95 +6438,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); } @@ -21878,7 +30609,7 @@ index 53e13735937..3542b5fac51 100644 static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -@@ -6854,46 +6426,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler +@@ -6854,46 +6480,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -21930,7 +30661,16 @@ index 53e13735937..3542b5fac51 100644 spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); vkd3d_spirv_build_op_function_end(builder); -@@ -7575,10 +7122,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co +@@ -7093,7 +6694,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + uint32_t components[VKD3D_VEC4_SIZE]; + unsigned int i, component_count; + +- if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) ++ if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) + goto general_implementation; + + spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); +@@ -7575,10 +7176,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co static void spirv_compiler_emit_return(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -21943,7 +30683,22 @@ index 53e13735937..3542b5fac51 100644 spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); -@@ -7972,12 +7519,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c +@@ -7851,7 +7452,13 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c + assert(compiler->control_flow_depth); + assert(cf_info->current_block == VKD3D_BLOCK_SWITCH); + +- assert(src->swizzle == VKD3D_SHADER_NO_SWIZZLE && src->reg.type == VKD3DSPR_IMMCONST); ++ if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) ++ { ++ WARN("Unexpected src swizzle %#x.\n", src->swizzle); ++ spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, ++ "The swizzle for a switch case value is not scalar."); ++ } ++ assert(src->reg.type == VKD3DSPR_IMMCONST); + value = *src->reg.u.immconst_uint; + + if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, +@@ -7972,12 +7579,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c if (cf_info) cf_info->inside_block = false; @@ -21959,7 +30714,7 @@ index 53e13735937..3542b5fac51 100644 case VKD3DSIH_TEXKILL: spirv_compiler_emit_kill(compiler, instruction); break; -@@ -8256,7 +7806,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, +@@ -8256,7 +7866,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, image_operands[image_operand_count++] = spirv_compiler_emit_texel_offset(compiler, instruction, image.resource_type_info); } @@ -21968,7 +30723,7 @@ index 53e13735937..3542b5fac51 100644 { operands_mask |= SpvImageOperandsSampleMask; image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, -@@ -9521,58 +9071,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, +@@ -9521,58 +9131,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); } @@ -22027,7 +30782,7 @@ index 53e13735937..3542b5fac51 100644 /* This function is called after declarations are processed. */ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) { -@@ -9581,8 +9079,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) +@@ -9581,8 +9139,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->xfb_info && compiler->xfb_info->element_count && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) spirv_compiler_emit_point_size(compiler); @@ -22036,7 +30791,41 @@ index 53e13735937..3542b5fac51 100644 } static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) -@@ -9660,9 +9156,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9607,33 +9163,12 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_GLOBAL_FLAGS: + spirv_compiler_emit_dcl_global_flags(compiler, instruction); + break; +- case VKD3DSIH_DCL_TEMPS: +- spirv_compiler_emit_dcl_temps(compiler, instruction); +- break; + case VKD3DSIH_DCL_INDEXABLE_TEMP: + spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); + break; +- case VKD3DSIH_DCL_CONSTANT_BUFFER: +- spirv_compiler_emit_dcl_constant_buffer(compiler, instruction); +- break; + case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: + spirv_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); + break; +- case VKD3DSIH_DCL_SAMPLER: +- spirv_compiler_emit_dcl_sampler(compiler, instruction); +- break; +- case VKD3DSIH_DCL: +- case VKD3DSIH_DCL_UAV_TYPED: +- spirv_compiler_emit_dcl_resource(compiler, instruction); +- break; +- case VKD3DSIH_DCL_RESOURCE_RAW: +- case VKD3DSIH_DCL_UAV_RAW: +- spirv_compiler_emit_dcl_resource_raw(compiler, instruction); +- break; +- case VKD3DSIH_DCL_RESOURCE_STRUCTURED: +- case VKD3DSIH_DCL_UAV_STRUCTURED: +- spirv_compiler_emit_dcl_resource_structured(compiler, instruction); +- break; + case VKD3DSIH_DCL_TGSM_RAW: + spirv_compiler_emit_dcl_tgsm_raw(compiler, instruction); + break; +@@ -9660,9 +9195,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_SIV: spirv_compiler_emit_dcl_output_siv(compiler, instruction); break; @@ -22046,7 +30835,7 @@ index 53e13735937..3542b5fac51 100644 case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; -@@ -9699,10 +9192,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9699,10 +9231,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_THREAD_GROUP: spirv_compiler_emit_dcl_thread_group(compiler, instruction); break; @@ -22057,7 +30846,7 @@ index 53e13735937..3542b5fac51 100644 case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: -@@ -9826,6 +9315,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9826,6 +9354,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CONTINUE: case VKD3DSIH_CONTINUEP: case VKD3DSIH_DEFAULT: @@ -22065,11 +30854,72 @@ index 53e13735937..3542b5fac51 100644 case VKD3DSIH_ELSE: case VKD3DSIH_ENDIF: case VKD3DSIH_ENDLOOP: -@@ -9947,28 +9437,55 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -9935,7 +9464,16 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_CUT_STREAM: + spirv_compiler_emit_cut_stream(compiler, instruction); + break; ++ case VKD3DSIH_DCL: ++ case VKD3DSIH_DCL_CONSTANT_BUFFER: + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: ++ case VKD3DSIH_DCL_RESOURCE_RAW: ++ case VKD3DSIH_DCL_RESOURCE_STRUCTURED: ++ case VKD3DSIH_DCL_SAMPLER: ++ case VKD3DSIH_DCL_TEMPS: ++ case VKD3DSIH_DCL_UAV_RAW: ++ case VKD3DSIH_DCL_UAV_STRUCTURED: ++ case VKD3DSIH_DCL_UAV_TYPED: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_NOP: + /* nothing to do */ +@@ -9947,28 +9485,102 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, return ret; } -int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, ++static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) ++ { ++ const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; ++ struct vkd3d_shader_register_range range; ++ ++ range.first = descriptor->register_index; ++ if (descriptor->count == ~0u) ++ range.last = ~0u; ++ else ++ range.last = descriptor->register_index + descriptor->count - 1; ++ range.space = descriptor->register_space; ++ ++ switch (descriptor->type) ++ { ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: ++ spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); ++ break; ++ ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: ++ spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); ++ break; ++ ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: ++ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, ++ descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, ++ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); ++ break; ++ ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: ++ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, ++ descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, ++ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ } ++} ++ +static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *spirv) @@ -22079,29 +30929,32 @@ index 53e13735937..3542b5fac51 100644 const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_phase *phase; ++ struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; + struct vkd3d_shader_instruction_array instructions; enum vkd3d_result result = VKD3D_OK; unsigned int i; ++ if (parser->shader_desc.temp_count) ++ spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); ++ ++ spirv_compiler_emit_descriptor_declarations(compiler); ++ compiler->location.column = 0; - for (i = 0; i < instructions->count; ++i) + compiler->location.line = 1; + ++ if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) ++ return result; ++ + instructions = parser->instructions; + memset(&parser->instructions, 0, sizeof(parser->instructions)); + -+ if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL -+ && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) -+ { -+ result = instruction_array_normalise_hull_shader_control_point_io(&instructions, -+ &compiler->input_signature); -+ } -+ if (result >= 0) -+ result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, -+ &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); -+ -+ if (result >= 0 && TRACE_ON()) -+ vkd3d_shader_trace(&instructions, &parser->shader_version); ++ compiler->input_signature = shader_desc->input_signature; ++ compiler->output_signature = shader_desc->output_signature; ++ compiler->patch_constant_signature = shader_desc->patch_constant_signature; ++ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); ++ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); ++ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); + + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); @@ -22129,13 +30982,13 @@ index 53e13735937..3542b5fac51 100644 else vkd3d_spirv_build_op_function_end(builder); -@@ -10023,23 +9540,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +@@ -10023,23 +9635,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, return VKD3D_OK; } -void spirv_compiler_destroy(struct spirv_compiler *compiler) +int spirv_compile(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { @@ -22170,10 +31023,10 @@ index 53e13735937..3542b5fac51 100644 } diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c new file mode 100644 -index 00000000000..d066b13ee4e +index 00000000000..045fb6c5f64 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -0,0 +1,5234 @@ +@@ -0,0 +1,5619 @@ +/* + * TPF (Direct3D shader models 4 and 5 bytecode) support + * @@ -22606,6 +31459,8 @@ index 00000000000..d066b13ee4e + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, ++ ++ VKD3D_SM4_REGISTER_TYPE_COUNT, +}; + +enum vkd3d_sm4_extended_operand_type @@ -22681,7 +31536,7 @@ index 00000000000..d066b13ee4e + +enum vkd3d_sm4_swizzle_type +{ -+ VKD3D_SM4_SWIZZLE_NONE = 0x0, ++ VKD3D_SM4_SWIZZLE_NONE = 0x0, /* swizzle bitfield contains a mask */ + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, +}; @@ -22747,6 +31602,12 @@ index 00000000000..d066b13ee4e + struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; +}; + ++struct vkd3d_sm4_lookup_tables ++{ ++ const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; ++ const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; ++}; ++ +struct vkd3d_shader_sm4_parser +{ + const uint32_t *start, *end, *ptr; @@ -22763,6 +31624,8 @@ index 00000000000..d066b13ee4e + struct sm4_index_range_array output_index_ranges; + struct sm4_index_range_array patch_constant_index_ranges; + ++ struct vkd3d_sm4_lookup_tables lookup; ++ + struct vkd3d_shader_parser p; +}; + @@ -22873,6 +31736,19 @@ index 00000000000..d066b13ee4e + VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; +} + ++static void shader_sm4_read_case_condition(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, ++ (struct vkd3d_shader_src_param *)&ins->src[0]); ++ if (ins->src[0].reg.type != VKD3DSPR_IMMCONST) ++ { ++ FIXME("Switch case value is not a 32-bit constant.\n"); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE, ++ "Switch case value is not a 32-bit immediate constant register."); ++ } ++} ++ +static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ @@ -23165,6 +32041,8 @@ index 00000000000..d066b13ee4e + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = *tokens; ++ if (opcode == VKD3D_SM4_OP_DCL_TEMPS) ++ priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); +} + +static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -23379,7 +32257,8 @@ index 00000000000..d066b13ee4e + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", + shader_sm4_read_conditional_op}, -+ {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, ++ {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", ++ shader_sm4_read_case_condition}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", + shader_sm4_read_conditional_op}, @@ -23642,50 +32521,10 @@ index 00000000000..d066b13ee4e + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, +}; + -+static const enum vkd3d_shader_register_type register_type_table[] = ++struct vkd3d_sm4_register_type_info +{ -+ /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, -+ /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, -+ /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, -+ /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, -+ /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, -+ /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, -+ /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, -+ /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, -+ /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, -+ /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, -+ /* UNKNOWN */ ~0u, -+ /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, -+ /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, -+ /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, -+ /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, -+ /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, -+ /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, -+ /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, -+ /* UNKNOWN */ ~0u, -+ /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, -+ /* UNKNOWN */ ~0u, -+ /* UNKNOWN */ ~0u, -+ /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, -+ /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, -+ /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, -+ /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, -+ /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, -+ /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, -+ /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, -+ /* UNKNOWN */ ~0u, -+ /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, -+ /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, -+ /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, -+ /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, -+ /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, -+ /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, -+ /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, -+ /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, -+ /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, -+ /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, -+ /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, -+ /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, ++ enum vkd3d_sm4_register_type sm4_type; ++ enum vkd3d_shader_register_type vkd3d_type; +}; + +static const enum vkd3d_shader_register_precision register_precision_table[] = @@ -23698,18 +32537,104 @@ index 00000000000..d066b13ee4e + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, +}; + ++struct tpf_writer ++{ ++ struct hlsl_ctx *ctx; ++ struct vkd3d_bytecode_buffer *buffer; ++ struct vkd3d_sm4_lookup_tables lookup; ++}; ++ +static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) +{ + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { -+ if (opcode == opcode_table[i].opcode) return &opcode_table[i]; ++ if (opcode == opcode_table[i].opcode) ++ return &opcode_table[i]; + } + + return NULL; +} + ++static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) ++{ ++ const struct vkd3d_sm4_register_type_info *info; ++ unsigned int i; ++ ++ static const struct vkd3d_sm4_register_type_info register_type_table[] = ++ { ++ {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, ++ {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, ++ {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, ++ {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, ++ {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, ++ {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, ++ {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, ++ {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, ++ {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, ++ {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, ++ {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, ++ {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, ++ {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, ++ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, ++ {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, ++ {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, ++ {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, ++ {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, ++ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, ++ {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, ++ {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, ++ {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, ++ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, ++ {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, ++ {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, ++ {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, ++ {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, ++ {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, ++ {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, ++ {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, ++ {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, ++ {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, ++ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, ++ {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, ++ {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, ++ {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, ++ }; ++ ++ memset(lookup, 0, sizeof(*lookup)); ++ ++ for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) ++ { ++ info = ®ister_type_table[i]; ++ lookup->register_type_info_from_sm4[info->sm4_type] = info; ++ lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; ++ } ++} ++ ++static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++{ ++ tpf->ctx = ctx; ++ tpf->buffer = buffer; ++ init_sm4_lookup_tables(&tpf->lookup); ++} ++ ++static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) ++{ ++ if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) ++ return NULL; ++ return lookup->register_type_info_from_sm4[sm4_type]; ++} ++ ++static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) ++{ ++ if (vkd3d_type >= VKD3DSPR_COUNT) ++ return NULL; ++ return lookup->register_type_info_from_vkd3d[vkd3d_type]; ++} ++ +static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) +{ + switch (sm4->p.shader_version.type) @@ -23816,6 +32741,7 @@ index 00000000000..d066b13ee4e +static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) +{ ++ const struct vkd3d_sm4_register_type_info *register_type_info; + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; @@ -23830,15 +32756,15 @@ index 00000000000..d066b13ee4e + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ if (register_type >= ARRAY_SIZE(register_type_table) -+ || register_type_table[register_type] == VKD3DSPR_INVALID) ++ register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); ++ if (!register_type_info) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { -+ param->type = register_type_table[register_type]; ++ param->type = register_type_info->vkd3d_type; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; @@ -24129,6 +33055,7 @@ index 00000000000..d066b13ee4e +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +{ ++ unsigned int dimension, mask; + DWORD token; + + if (*ptr >= end) @@ -24144,37 +33071,63 @@ index 00000000000..d066b13ee4e + return false; + } + -+ if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) ++ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) + { -+ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ } -+ else -+ { -+ enum vkd3d_sm4_swizzle_type swizzle_type = -+ (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ case VKD3D_SM4_DIMENSION_NONE: ++ case VKD3D_SM4_DIMENSION_SCALAR: ++ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ break; + -+ switch (swizzle_type) ++ case VKD3D_SM4_DIMENSION_VEC4: + { -+ case VKD3D_SM4_SWIZZLE_NONE: -+ if (shader_sm4_is_scalar_register(&src_param->reg)) -+ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ else ++ enum vkd3d_sm4_swizzle_type swizzle_type = ++ (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ ++ switch (swizzle_type) ++ { ++ case VKD3D_SM4_SWIZZLE_NONE: + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ break; + -+ case VKD3D_SM4_SWIZZLE_SCALAR: -+ src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; -+ src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; -+ break; ++ mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; ++ /* Mask seems only to be used for vec4 constants and is always zero. */ ++ if (!register_is_constant(&src_param->reg)) ++ { ++ FIXME("Source mask %#x is not for a constant.\n", mask); ++ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, ++ "Unhandled mask %#x for a non-constant source register.", mask); ++ } ++ else if (mask) ++ { ++ FIXME("Unhandled mask %#x.\n", mask); ++ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, ++ "Unhandled source register mask %#x.", mask); ++ } + -+ case VKD3D_SM4_SWIZZLE_VEC4: -+ src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); -+ break; ++ break; + -+ default: -+ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); -+ break; ++ case VKD3D_SM4_SWIZZLE_SCALAR: ++ src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; ++ src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; ++ break; ++ ++ case VKD3D_SM4_SWIZZLE_VEC4: ++ src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); ++ break; ++ ++ default: ++ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, ++ "Source register swizzle type %#x is invalid.", swizzle_type); ++ break; ++ } ++ break; + } ++ ++ default: ++ FIXME("Unhandled dimension %#x.\n", dimension); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, ++ "Source register dimension %#x is invalid.", dimension); ++ break; + } + + if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, @@ -24187,7 +33140,9 @@ index 00000000000..d066b13ee4e +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +{ ++ enum vkd3d_sm4_swizzle_type swizzle_type; + enum vkd3d_shader_src_modifier modifier; ++ unsigned int dimension, swizzle; + DWORD token; + + if (*ptr >= end) @@ -24209,10 +33164,53 @@ index 00000000000..d066b13ee4e + return false; + } + -+ dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; ++ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) ++ { ++ case VKD3D_SM4_DIMENSION_NONE: ++ dst_param->write_mask = 0; ++ break; ++ ++ case VKD3D_SM4_DIMENSION_SCALAR: ++ dst_param->write_mask = VKD3DSP_WRITEMASK_0; ++ break; ++ ++ case VKD3D_SM4_DIMENSION_VEC4: ++ swizzle_type = (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ switch (swizzle_type) ++ { ++ case VKD3D_SM4_SWIZZLE_NONE: ++ dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; ++ break; ++ ++ case VKD3D_SM4_SWIZZLE_VEC4: ++ swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); ++ if (swizzle != VKD3D_SHADER_NO_SWIZZLE) ++ { ++ FIXME("Unhandled swizzle %#x.\n", swizzle); ++ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE, ++ "Unhandled destination register swizzle %#x.", swizzle); ++ } ++ dst_param->write_mask = VKD3DSP_WRITEMASK_ALL; ++ break; ++ ++ default: ++ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, ++ "Destination register swizzle type %#x is invalid.", swizzle_type); ++ break; ++ } ++ break; ++ ++ default: ++ FIXME("Unhandled dimension %#x.\n", dimension); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, ++ "Destination register dimension %#x is invalid.", dimension); ++ break; ++ } ++ + if (data_type == VKD3D_DATA_DOUBLE) + dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); -+ /* Scalar registers are declared with no write mask in shader bytecode. */ ++ /* Some scalar registers are declared with no write mask in shader bytecode. */ + if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + dst_param->modifiers = 0; @@ -24538,6 +33536,8 @@ index 00000000000..d066b13ee4e + sm4->output_map[e->register_index] = e->semantic_index; + } + ++ init_sm4_lookup_tables(&sm4->lookup); ++ + return true; +} + @@ -24618,6 +33618,7 @@ index 00000000000..d066b13ee4e + } + + shader_desc = &sm4->p.shader_desc; ++ shader_desc->is_dxil = false; + if ((ret = shader_extract_from_dxbc(&compile_info->source, + message_context, compile_info->source_name, shader_desc)) < 0) + { @@ -24675,7 +33676,7 @@ index 00000000000..d066b13ee4e + return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + -+static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); ++static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); + +static bool type_is_integer(const struct hlsl_type *type) +{ @@ -24692,7 +33693,7 @@ index 00000000000..d066b13ee4e +} + +bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -+ bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) ++ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) +{ + unsigned int i; + @@ -24702,24 +33703,24 @@ index 00000000000..d066b13ee4e + bool output; + enum vkd3d_shader_type shader_type; + enum vkd3d_sm4_swizzle_type swizzle_type; -+ enum vkd3d_sm4_register_type type; ++ enum vkd3d_shader_register_type type; + bool has_idx; + } + register_table[] = + { -+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, -+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, -+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, ++ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, ++ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, ++ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false}, + -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false}, + + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) @@ -24728,7 +33729,8 @@ index 00000000000..d066b13ee4e + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type) + { -+ *type = register_table[i].type; ++ if (type) ++ *type = register_table[i].type; + if (swizzle_type) + *swizzle_type = register_table[i].swizzle_type; + *has_idx = register_table[i].has_idx; @@ -24800,7 +33802,8 @@ index 00000000000..d066b13ee4e + return true; +} + -+static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) ++static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, ++ uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +{ + /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN + * sections to be aligned. Without this, the sections themselves will be @@ -24808,6 +33811,9 @@ index 00000000000..d066b13ee4e + size_t size = bytecode_align(buffer); + + dxbc_writer_add_section(dxbc, tag, buffer->data, size); ++ ++ if (buffer->status < 0) ++ ctx->result = buffer->status; +} + +static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) @@ -24825,7 +33831,6 @@ index 00000000000..d066b13ee4e + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -+ enum vkd3d_sm4_register_type type; + uint32_t usage_idx, reg_idx; + D3D_NAME usage; + bool has_idx; @@ -24839,14 +33844,13 @@ index 00000000000..d066b13ee4e + continue; + usage_idx = var->semantic.index; + -+ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) ++ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) + { + reg_idx = has_idx ? var->semantic.index : ~0u; + } + else + { + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ type = VKD3D_SM4_RT_INPUT; + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; + } + @@ -24915,7 +33919,7 @@ index 00000000000..d066b13ee4e + + set_u32(&buffer, count_position, i); + -+ add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); ++ add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); +} + +static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) @@ -25003,6 +34007,22 @@ index 00000000000..d066b13ee4e + return D3D_SVT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3D_SVT_VOID; ++ case HLSL_TYPE_UAV: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3D_SVT_RWTEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3D_SVT_RWTEXTURE2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3D_SVT_RWTEXTURE3D; ++ case HLSL_SAMPLER_DIM_1DARRAY: ++ return D3D_SVT_RWTEXTURE1DARRAY; ++ case HLSL_SAMPLER_DIM_2DARRAY: ++ return D3D_SVT_RWTEXTURE2DARRAY; ++ default: ++ vkd3d_unreachable(); ++ } + default: + vkd3d_unreachable(); + } @@ -25143,47 +34163,154 @@ index 00000000000..d066b13ee4e + } +} + ++struct extern_resource ++{ ++ /* var is only not NULL if this resource is a whole variable, so it may be responsible for more ++ * than one component. */ ++ const struct hlsl_ir_var *var; ++ ++ char *name; ++ struct hlsl_type *data_type; ++ bool is_user_packed; ++ ++ enum hlsl_regset regset; ++ unsigned int id, bind_count; ++}; ++ +static int sm4_compare_extern_resources(const void *a, const void *b) +{ -+ const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; -+ const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; -+ enum hlsl_regset aa_regset, bb_regset; ++ const struct extern_resource *aa = (const struct extern_resource *)a; ++ const struct extern_resource *bb = (const struct extern_resource *)b; ++ int r; + -+ aa_regset = hlsl_type_get_regset(aa->data_type); -+ bb_regset = hlsl_type_get_regset(bb->data_type); ++ if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) ++ return r; + -+ if (aa_regset != bb_regset) -+ return aa_regset - bb_regset; -+ -+ return aa->regs[aa_regset].id - bb->regs[bb_regset].id; ++ return vkd3d_u32_compare(aa->id, bb->id); +} + -+static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) ++static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +{ -+ const struct hlsl_ir_var **extern_resources = NULL; ++ unsigned int i; ++ ++ for (i = 0; i < count; ++i) ++ vkd3d_free(extern_resources[i].name); ++ vkd3d_free(extern_resources); ++} ++ ++static const char *string_skip_tag(const char *string) ++{ ++ if (!strncmp(string, "", strlen(""))) ++ return string + strlen(""); ++ return string; ++} ++ ++static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) ++{ ++ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; ++ struct extern_resource *extern_resources = NULL; + const struct hlsl_ir_var *var; + enum hlsl_regset regset; + size_t capacity = 0; ++ char *name; + + *count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { -+ if (!hlsl_type_is_resource(var->data_type)) -+ continue; -+ regset = hlsl_type_get_regset(var->data_type); -+ if (!var->regs[regset].allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) ++ if (separate_components) + { -+ *count = 0; -+ return NULL; -+ } ++ unsigned int component_count = hlsl_type_component_count(var->data_type); ++ unsigned int k, regset_offset; + -+ extern_resources[*count] = var; -+ ++*count; ++ for (k = 0; k < component_count; ++k) ++ { ++ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ struct vkd3d_string_buffer *name_buffer; ++ ++ if (!hlsl_type_is_resource(component_type)) ++ continue; ++ ++ regset = hlsl_type_get_regset(component_type); ++ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); ++ ++ if (regset_offset > var->regs[regset].allocation_size) ++ continue; ++ ++ if (var->objects_usage[regset][regset_offset].used) ++ { ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, ++ sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ hlsl_release_string_buffer(ctx, name_buffer); ++ return NULL; ++ } ++ hlsl_release_string_buffer(ctx, name_buffer); ++ ++ extern_resources[*count].var = NULL; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].data_type = component_type; ++ extern_resources[*count].is_user_packed = false; ++ ++ extern_resources[*count].regset = regset; ++ extern_resources[*count].id = var->regs[regset].id + regset_offset; ++ extern_resources[*count].bind_count = 1; ++ ++ ++*count; ++ } ++ } ++ } ++ else ++ { ++ if (!hlsl_type_is_resource(var->data_type)) ++ continue; ++ regset = hlsl_type_get_regset(var->data_type); ++ if (!var->regs[regset].allocated) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, ++ sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ extern_resources[*count].var = var; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].data_type = var->data_type; ++ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; ++ ++ extern_resources[*count].regset = regset; ++ extern_resources[*count].id = var->regs[regset].id; ++ extern_resources[*count].bind_count = var->bind_count[regset]; ++ ++ ++*count; ++ } + } + + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); @@ -25196,8 +34323,8 @@ index 00000000000..d066b13ee4e + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; -+ const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; ++ struct extern_resource *extern_resources; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + @@ -25251,18 +34378,15 @@ index 00000000000..d066b13ee4e + + for (i = 0; i < extern_resources_count; ++i) + { -+ enum hlsl_regset regset; ++ const struct extern_resource *resource = &extern_resources[i]; + uint32_t flags = 0; + -+ var = extern_resources[i]; -+ regset = hlsl_type_get_regset(var->data_type); -+ -+ if (var->reg_reservation.reg_type) ++ if (resource->is_user_packed) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, sm4_resource_type(var->data_type)); -+ if (regset == HLSL_REGSET_SAMPLERS) ++ put_u32(&buffer, sm4_resource_type(resource->data_type)); ++ if (resource->regset == HLSL_REGSET_SAMPLERS) + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); @@ -25270,15 +34394,15 @@ index 00000000000..d066b13ee4e + } + else + { -+ unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; ++ unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx; + -+ put_u32(&buffer, sm4_resource_format(var->data_type)); -+ put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); ++ put_u32(&buffer, sm4_resource_format(resource->data_type)); ++ put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } -+ put_u32(&buffer, var->regs[regset].id); -+ put_u32(&buffer, var->regs[regset].bind_count); ++ put_u32(&buffer, resource->id); ++ put_u32(&buffer, resource->bind_count); + put_u32(&buffer, flags); + } + @@ -25304,9 +34428,9 @@ index 00000000000..d066b13ee4e + + for (i = 0; i < extern_resources_count; ++i) + { -+ var = extern_resources[i]; ++ const struct extern_resource *resource = &extern_resources[i]; + -+ string_offset = put_string(&buffer, var->name); ++ string_offset = put_string(&buffer, resource->name); + set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); + } + @@ -25410,9 +34534,9 @@ index 00000000000..d066b13ee4e + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + -+ add_section(dxbc, TAG_RDEF, &buffer); ++ add_section(ctx, dxbc, TAG_RDEF, &buffer); + -+ vkd3d_free(extern_resources); ++ sm4_free_extern_resources(extern_resources, extern_resources_count); +} + +static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) @@ -25484,8 +34608,8 @@ index 00000000000..d066b13ee4e + +struct sm4_register +{ -+ enum vkd3d_sm4_register_type type; -+ uint32_t idx[2]; ++ enum vkd3d_shader_register_type type; ++ struct vkd3d_shader_register_index idx[2]; + unsigned int idx_count; + enum vkd3d_sm4_dimension dim; + uint32_t immconst_uint[4]; @@ -25522,8 +34646,9 @@ index 00000000000..d066b13ee4e + +static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, + unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, -+ const struct hlsl_deref *deref, const struct hlsl_type *data_type) ++ const struct hlsl_deref *deref) +{ ++ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) @@ -25532,37 +34657,37 @@ index 00000000000..d066b13ee4e + + if (regset == HLSL_REGSET_TEXTURES) + { -+ reg->type = VKD3D_SM4_RT_RESOURCE; ++ reg->type = VKD3DSPR_RESOURCE; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(deref->offset_regset == HLSL_REGSET_TEXTURES); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(regset == HLSL_REGSET_TEXTURES); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { -+ reg->type = VKD3D_SM5_RT_UAV; ++ reg->type = VKD3DSPR_UAV; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(deref->offset_regset == HLSL_REGSET_UAVS); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(regset == HLSL_REGSET_UAVS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { -+ reg->type = VKD3D_SM4_RT_SAMPLER; ++ reg->type = VKD3DSPR_SAMPLER; + reg->dim = VKD3D_SM4_DIMENSION_NONE; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(regset == HLSL_REGSET_SAMPLERS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } @@ -25571,12 +34696,12 @@ index 00000000000..d066b13ee4e + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + assert(data_type->class <= HLSL_CLASS_VECTOR); -+ reg->type = VKD3D_SM4_RT_CONSTBUFFER; ++ reg->type = VKD3DSPR_CONSTBUFFER; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0] = var->buffer->reg.id; -+ reg->idx[1] = offset / 4; ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } @@ -25591,7 +34716,7 @@ index 00000000000..d066b13ee4e + + if (has_idx) + { -+ reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + @@ -25603,11 +34728,11 @@ index 00000000000..d066b13ee4e + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); -+ reg->type = VKD3D_SM4_RT_INPUT; ++ reg->type = VKD3DSPR_INPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } @@ -25622,11 +34747,11 @@ index 00000000000..d066b13ee4e + + if (has_idx) + { -+ reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + -+ if (reg->type == VKD3D_SM4_RT_DEPTHOUT) ++ if (reg->type == VKD3DSPR_DEPTHOUT) + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + else + reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -25637,9 +34762,9 @@ index 00000000000..d066b13ee4e + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); -+ reg->type = VKD3D_SM4_RT_OUTPUT; ++ reg->type = VKD3DSPR_OUTPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } @@ -25649,22 +34774,22 @@ index 00000000000..d066b13ee4e + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); -+ reg->type = VKD3D_SM4_RT_TEMP; ++ reg->type = VKD3DSPR_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +} + +static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, -+ const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) ++ const struct hlsl_deref *deref, unsigned int map_writemask) +{ + unsigned int writemask; + -+ sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); ++ sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} @@ -25673,10 +34798,10 @@ index 00000000000..d066b13ee4e + enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) +{ + assert(instr->reg.allocated); -+ reg->type = VKD3D_SM4_RT_TEMP; ++ reg->type = VKD3DSPR_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0] = instr->reg.id; ++ reg->idx[0].offset = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; +} @@ -25692,7 +34817,7 @@ index 00000000000..d066b13ee4e + const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) +{ + src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ src->reg.type = VKD3D_SM4_RT_IMMCONST; ++ src->reg.type = VKD3DSPR_IMMCONST; + if (width == 1) + { + src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; @@ -25705,8 +34830,10 @@ index 00000000000..d066b13ee4e + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + for (i = 0; i < 4; ++i) + { -+ if (map_writemask & (1u << i)) ++ if ((map_writemask & (1u << i)) && (j < width)) + src->reg.immconst_uint[i] = value->u[j++].u; ++ else ++ src->reg.immconst_uint[i] = 0; + } + } +} @@ -25729,17 +34856,100 @@ index 00000000000..d066b13ee4e + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + -+static uint32_t sm4_encode_register(const struct sm4_register *reg) ++static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) +{ -+ return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) -+ | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) -+ | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); ++ const struct vkd3d_sm4_register_type_info *register_type_info; ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; ++ uint32_t sm4_reg_type, reg_dim; ++ uint32_t token = 0; ++ unsigned int j; ++ ++ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); ++ if (!register_type_info) ++ { ++ FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); ++ sm4_reg_type = VKD3D_SM4_RT_TEMP; ++ } ++ else ++ { ++ sm4_reg_type = register_type_info->sm4_type; ++ } ++ ++ reg_dim = dst->reg.dim; ++ ++ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; ++ token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; ++ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; ++ put_u32(buffer, token); ++ ++ for (j = 0; j < dst->reg.idx_count; ++j) ++ { ++ put_u32(buffer, dst->reg.idx[j].offset); ++ assert(!dst->reg.idx[j].rel_addr); ++ } ++} ++ ++static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) ++{ ++ const struct vkd3d_sm4_register_type_info *register_type_info; ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; ++ uint32_t sm4_reg_type, reg_dim; ++ uint32_t token = 0; ++ unsigned int j; ++ ++ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); ++ if (!register_type_info) ++ { ++ FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); ++ sm4_reg_type = VKD3D_SM4_RT_TEMP; ++ } ++ else ++ { ++ sm4_reg_type = register_type_info->sm4_type; ++ } ++ ++ reg_dim = src->reg.dim; ++ ++ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; ++ token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; ++ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ { ++ token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; ++ } ++ if (src->reg.mod) ++ token |= VKD3D_SM4_EXTENDED_OPERAND; ++ put_u32(buffer, token); ++ ++ if (src->reg.mod) ++ put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) ++ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); ++ ++ for (j = 0; j < src->reg.idx_count; ++j) ++ { ++ put_u32(buffer, src->reg.idx[j].offset); ++ assert(!src->reg.idx[j].rel_addr); ++ } ++ ++ if (src->reg.type == VKD3DSPR_IMMCONST) ++ { ++ put_u32(buffer, src->reg.immconst_uint[0]); ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ { ++ put_u32(buffer, src->reg.immconst_uint[1]); ++ put_u32(buffer, src->reg.immconst_uint[2]); ++ put_u32(buffer, src->reg.immconst_uint[3]); ++ } ++ } +} + +static uint32_t sm4_register_order(const struct sm4_register *reg) +{ + uint32_t order = 1; -+ if (reg->type == VKD3D_SM4_RT_IMMCONST) ++ if (reg->type == VKD3DSPR_IMMCONST) + order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; + order += reg->idx_count; + if (reg->mod) @@ -25747,8 +34957,9 @@ index 00000000000..d066b13ee4e + return order; +} + -+static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) ++static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) +{ ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = instr->opcode; + unsigned int size = 1, i, j; + @@ -25776,43 +34987,10 @@ index 00000000000..d066b13ee4e + } + + for (i = 0; i < instr->dst_count; ++i) -+ { -+ token = sm4_encode_register(&instr->dsts[i].reg); -+ if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -+ token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; -+ put_u32(buffer, token); -+ -+ for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) -+ put_u32(buffer, instr->dsts[i].reg.idx[j]); -+ } ++ sm4_write_dst_register(tpf, &instr->dsts[i]); + + for (i = 0; i < instr->src_count; ++i) -+ { -+ token = sm4_encode_register(&instr->srcs[i].reg); -+ token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; -+ if (instr->srcs[i].reg.mod) -+ token |= VKD3D_SM4_EXTENDED_OPERAND; -+ put_u32(buffer, token); -+ -+ if (instr->srcs[i].reg.mod) -+ put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -+ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); -+ -+ for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) -+ put_u32(buffer, instr->srcs[i].reg.idx[j]); -+ -+ if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) -+ { -+ put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); -+ if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -+ { -+ put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); -+ put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); -+ put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); -+ } -+ } -+ } ++ sm4_write_src_register(tpf, &instr->srcs[i]); + + if (instr->byte_stride) + put_u32(buffer, instr->byte_stride); @@ -25848,67 +35026,75 @@ index 00000000000..d066b13ee4e + return true; +} + -+static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) ++static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) +{ + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, -+ .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, -+ .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, ++ .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, ++ .srcs[0].reg.idx[0].offset = cbuffer->reg.id, ++ .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, + .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, + .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), + .src_count = 1, + }; -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) ++static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) +{ -+ unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; ++ struct hlsl_type *component_type; ++ unsigned int i; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + -+ .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, ++ .dsts[0].reg.type = VKD3DSPR_SAMPLER, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + -+ if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) ++ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); ++ ++ if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; + -+ for (i = 0; i < count; ++i) ++ assert(resource->regset == HLSL_REGSET_SAMPLERS); ++ ++ for (i = 0; i < resource->bind_count; ++i) + { -+ if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) ++ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + -+ instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; -+ write_sm4_instruction(buffer, &instr); ++ instr.dsts[0].reg.idx[0].offset = resource->id + i; ++ write_sm4_instruction(tpf, &instr); + } +} + -+static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_var *var, bool uav) ++static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, ++ bool uav) +{ + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; -+ unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; ++ unsigned int i; + -+ component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); ++ assert(resource->regset == regset); + -+ for (i = 0; i < count; ++i) ++ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); ++ ++ for (i = 0; i < resource->bind_count; ++i) + { -+ if (!var->objects_usage[regset][i].used) ++ if (resource->var && !resource->var->objects_usage[regset][i].used) + continue; + + instr = (struct sm4_instruction) + { -+ .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, -+ .dsts[0].reg.idx = {var->regs[regset].id + i}, ++ .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, ++ .dsts[0].reg.idx[0].offset = resource->id + i, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + @@ -25918,11 +35104,11 @@ index 00000000000..d066b13ee4e + + if (uav) + { -+ switch (var->data_type->sampler_dim) ++ switch (resource->data_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; -+ instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; ++ instr.byte_stride = resource->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; + break; + default: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; @@ -25941,13 +35127,13 @@ index 00000000000..d066b13ee4e + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } +} + -+static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) ++static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) +{ -+ const struct hlsl_profile_info *profile = ctx->profile; ++ const struct hlsl_profile_info *profile = tpf->ctx->profile; + const bool output = var->is_output_semantic; + D3D_NAME usage; + bool has_idx; @@ -25958,11 +35144,11 @@ index 00000000000..d066b13ee4e + .dst_count = 1, + }; + -+ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) ++ if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + { + if (has_idx) + { -+ instr.dsts[0].reg.idx[0] = var->semantic.index; ++ instr.dsts[0].reg.idx[0].offset = var->semantic.index; + instr.dsts[0].reg.idx_count = 1; + } + else @@ -25973,16 +35159,16 @@ index 00000000000..d066b13ee4e + } + else + { -+ instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; -+ instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; ++ instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + -+ if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) ++ if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) + instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + -+ hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); ++ hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); + if (usage == ~0u) + usage = D3D_NAME_UNDEFINED; + @@ -26042,10 +35228,10 @@ index 00000000000..d066b13ee4e + break; + } + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) ++static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) +{ + struct sm4_instruction instr = + { @@ -26055,33 +35241,35 @@ index 00000000000..d066b13ee4e + .idx_count = 1, + }; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) ++static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, + -+ .idx = {thread_count[0], thread_count[1], thread_count[2]}, ++ .idx[0] = thread_count[0], ++ .idx[1] = thread_count[1], ++ .idx[2] = thread_count[2], + .idx_count = 3, + }; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) ++static void write_sm4_ret(const struct tpf_writer *tpf) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_RET, + }; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) +{ + struct sm4_instruction instr; @@ -26096,12 +35284,11 @@ index 00000000000..d066b13ee4e + instr.srcs[0].reg.mod = src_mod; + instr.src_count = 1; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, -+ enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, -+ const struct hlsl_ir_node *src) ++static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) +{ + struct sm4_instruction instr; + @@ -26111,7 +35298,7 @@ index 00000000000..d066b13ee4e + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); -+ instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; @@ -26119,10 +35306,10 @@ index 00000000000..d066b13ee4e + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); + instr.src_count = 1; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; @@ -26137,11 +35324,11 @@ index 00000000000..d066b13ee4e + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + instr.src_count = 2; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + +/* dp# instructions don't map the swizzle. */ -+static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; @@ -26156,10 +35343,10 @@ index 00000000000..d066b13ee4e + sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, ++static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ @@ -26171,7 +35358,7 @@ index 00000000000..d066b13ee4e + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); -+ instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; @@ -26180,15 +35367,35 @@ index 00000000000..d066b13ee4e + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); + instr.src_count = 2; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, ++static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, ++ const struct hlsl_ir_node *src3) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = opcode; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); ++ sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); ++ sm4_src_from_node(&instr.srcs[2], src3, instr.dsts[0].writemask); ++ instr.src_count = 3; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) +{ ++ const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); + bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); @@ -26205,7 +35412,7 @@ index 00000000000..d066b13ee4e + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { -+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } @@ -26228,7 +35435,7 @@ index 00000000000..d066b13ee4e + + sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); + -+ sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + + instr.src_count = 2; + @@ -26243,13 +35450,13 @@ index 00000000000..d066b13ee4e + + memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); + instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ reg->type = VKD3D_SM4_RT_IMMCONST; ++ reg->type = VKD3DSPR_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = index->value.u[0].u; + } -+ else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) ++ else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) + { -+ hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); ++ hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } + else + { @@ -26259,13 +35466,11 @@ index 00000000000..d066b13ee4e + ++instr.src_count; + } + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_resource_load *load) ++static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +{ -+ const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; @@ -26308,7 +35513,7 @@ index 00000000000..d066b13ee4e + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { -+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } @@ -26318,8 +35523,8 @@ index 00000000000..d066b13ee4e + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); -+ sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 3; + + if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD @@ -26341,7 +35546,52 @@ index 00000000000..d066b13ee4e + ++instr.src_count; + } + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *dst = &load->node; ++ struct sm4_instruction instr; ++ ++ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; ++ if (dst->data_type->base_type == HLSL_TYPE_UINT) ++ instr.opcode |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *dst = &load->node; ++ struct sm4_instruction instr; ++ ++ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_RESINFO; ++ if (dst->data_type->base_type == HLSL_TYPE_UINT) ++ instr.opcode |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(tpf, &instr); +} + +static bool type_is_float(const struct hlsl_type *type) @@ -26349,8 +35599,7 @@ index 00000000000..d066b13ee4e + return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; +} + -+static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, ++static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) +{ + struct sm4_instruction instr; @@ -26363,16 +35612,15 @@ index 00000000000..d066b13ee4e + + sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); + instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; ++ instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; + instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + instr.srcs[1].reg.immconst_uint[0] = mask; + instr.src_count = 2; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_cast(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +{ + static const union + { @@ -26394,23 +35642,23 @@ index 00000000000..d066b13ee4e + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_UINT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: -+ write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); ++ write_sm4_cast_from_bool(tpf, expr, arg1, one.u); + break; + + case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); + break; + + default: @@ -26423,20 +35671,20 @@ index 00000000000..d066b13ee4e + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: -+ write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ write_sm4_cast_from_bool(tpf, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); + break; + + default: @@ -26449,20 +35697,20 @@ index 00000000000..d066b13ee4e + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: -+ write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ write_sm4_cast_from_bool(tpf, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); + break; + + default: @@ -26471,7 +35719,7 @@ index 00000000000..d066b13ee4e + break; + + case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); + break; + + case HLSL_TYPE_BOOL: @@ -26481,35 +35729,35 @@ index 00000000000..d066b13ee4e + } +} + -+static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) ++static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, ++ const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + -+ sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); ++ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_expr(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +{ + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; ++ const struct hlsl_ir_node *arg3 = expr->operands[2].node; + const struct hlsl_type *dst_type = expr->node.data_type; + struct vkd3d_string_buffer *dst_type_string; + + assert(expr->node.reg.allocated); + -+ if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) ++ if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) + return; + + switch (expr->op) @@ -26518,161 +35766,181 @@ index 00000000000..d066b13ee4e + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_BIT_NOT: + assert(type_is_integer(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_CAST: -+ write_sm4_cast(ctx, buffer, expr); ++ write_sm4_cast(tpf, expr); + break; + + case HLSL_OP1_COS: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); ++ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + + case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSX_COARSE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSX_FINE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSY_COARSE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSY_FINE: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_EXP2: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FLOOR: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FRACT: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOGIC_NOT: + assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_REINTERPRET: -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_OP1_ROUND: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_RSQ: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP1_SIN: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); ++ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + + case HLSL_OP1_SQRT: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_TRUNC: + assert(type_is_float(dst_type)); -+ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + break; + + case HLSL_OP2_ADD: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_BIT_AND: + assert(type_is_integer(dst_type)); -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_OR: + assert(type_is_integer(dst_type)); -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + assert(type_is_integer(dst_type)); -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_DIV: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: -+ write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + } + break; + @@ -26683,15 +35951,15 @@ index 00000000000..d066b13ee4e + switch (arg1->data_type->dimx) + { + case 4: -+ write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + break; + + case 3: -+ write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + break; + + case 2: -+ write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + break; + + case 1: @@ -26701,7 +35969,7 @@ index 00000000000..d066b13ee4e + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + } + break; + @@ -26714,18 +35982,18 @@ index 00000000000..d066b13ee4e + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; @@ -26740,21 +36008,21 @@ index 00000000000..d066b13ee4e + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; @@ -26769,21 +36037,21 @@ index 00000000000..d066b13ee4e + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; @@ -26791,37 +36059,37 @@ index 00000000000..d066b13ee4e + + case HLSL_OP2_LOGIC_AND: + assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: + assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + } + break; + @@ -26829,19 +36097,19 @@ index 00000000000..d066b13ee4e + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + } + break; + @@ -26849,11 +36117,11 @@ index 00000000000..d066b13ee4e + switch (dst_type->base_type) + { + case HLSL_TYPE_UINT: -+ write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + } + break; + @@ -26861,18 +36129,18 @@ index 00000000000..d066b13ee4e + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ -+ write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + } + break; + @@ -26885,18 +36153,18 @@ index 00000000000..d066b13ee4e + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: -+ write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + break; + + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; @@ -26905,18 +36173,22 @@ index 00000000000..d066b13ee4e + case HLSL_OP2_RSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); -+ write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, ++ write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + &expr->node, arg1, arg2); + break; + ++ case HLSL_OP3_MOVC: ++ write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); ++ break; ++ + default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + } + -+ hlsl_release_string_buffer(ctx, dst_type_string); ++ hlsl_release_string_buffer(tpf->ctx, dst_type_string); +} + -+static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) ++static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) +{ + struct sm4_instruction instr = + { @@ -26927,26 +36199,25 @@ index 00000000000..d066b13ee4e + assert(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + -+ write_sm4_block(ctx, buffer, &iff->then_block); ++ write_sm4_block(tpf, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + -+ write_sm4_block(ctx, buffer, &iff->else_block); ++ write_sm4_block(tpf, &iff->else_block); + } + + instr.opcode = VKD3D_SM4_OP_ENDIF; + instr.src_count = 0; -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_jump(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) ++static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) +{ + struct sm4_instruction instr = {0}; + @@ -26956,19 +36227,13 @@ index 00000000000..d066b13ee4e + instr.opcode = VKD3D_SM4_OP_BREAK; + break; + -+ case HLSL_IR_JUMP_DISCARD: ++ case HLSL_IR_JUMP_DISCARD_NZ: + { -+ struct sm4_register *reg = &instr.srcs[0].reg; -+ + instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -+ instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.src_count = 1; -+ reg->type = VKD3D_SM4_RT_IMMCONST; -+ reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -+ reg->immconst_uint[0] = ~0u; -+ ++ sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); + break; + } + @@ -26976,11 +36241,11 @@ index 00000000000..d066b13ee4e + vkd3d_unreachable(); + + default: -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); ++ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return; + } + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + +/* Does this variable's data come directly from the API user, rather than being @@ -26994,8 +36259,7 @@ index 00000000000..d066b13ee4e + return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; +} + -+static void write_sm4_load(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) ++static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) +{ + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; @@ -27006,7 +36270,7 @@ index 00000000000..d066b13ee4e + instr.dst_count = 1; + + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); -+ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) ++ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + { + struct hlsl_constant_value value; + @@ -27015,7 +36279,7 @@ index 00000000000..d066b13ee4e + + instr.opcode = VKD3D_SM4_OP_MOVC; + -+ sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); + + memset(&value, 0xff, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); @@ -27027,33 +36291,31 @@ index 00000000000..d066b13ee4e + { + instr.opcode = VKD3D_SM4_OP_MOV; + -+ sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); + instr.src_count = 1; + } + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_loop(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) ++static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_LOOP, + }; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + -+ write_sm4_block(ctx, buffer, &loop->body); ++ write_sm4_block(tpf, &loop->body); + + instr.opcode = VKD3D_SM4_OP_ENDLOOP; -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -+ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, -+ const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) ++static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, ++ unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +{ + struct sm4_src_register *src; + struct sm4_instruction instr; @@ -27071,9 +36333,9 @@ index 00000000000..d066b13ee4e + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { -+ if (ctx->profile->major_version < 5) ++ if (tpf->ctx->profile->major_version < 5) + { -+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return; + } @@ -27082,58 +36344,39 @@ index 00000000000..d066b13ee4e + } + } + -+ sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); + + src = &instr.srcs[instr.src_count++]; -+ sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; + src->swizzle = swizzle; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_resource_load(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) ++static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +{ -+ const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *coords = load->coords.node; + -+ if (!hlsl_type_is_resource(resource_type)) ++ if (load->sampler.var && !load->sampler.var->is_uniform) + { -+ hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); ++ hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return; + } + -+ if (load->sampler.var) -+ { -+ const struct hlsl_type *sampler_type = load->sampler.var->data_type; -+ -+ if (!hlsl_type_is_resource(sampler_type)) -+ { -+ hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); -+ return; -+ } -+ -+ if (!load->sampler.var->is_uniform) -+ { -+ hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -+ return; -+ } -+ } -+ + if (!load->resource.var->is_uniform) + { -+ hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); ++ hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); + return; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: -+ write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, ++ write_sm4_ld(tpf, &load->node, &load->resource, + coords, sample_index, texel_offset, load->sampling_dim); + break; + @@ -27143,64 +36386,61 @@ index 00000000000..d066b13ee4e + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: -+ if (!load->sampler.var) -+ { -+ hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); -+ return; -+ } -+ write_sm4_sample(ctx, buffer, load); ++ /* Combined sample expressions were lowered. */ ++ assert(load->sampler.var); ++ write_sm4_sample(tpf, load); + break; + + case HLSL_RESOURCE_GATHER_RED: -+ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -+ &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(X, X, X, X), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_GREEN: -+ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -+ &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_BLUE: -+ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -+ &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_ALPHA: -+ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -+ &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, ++ HLSL_SWIZZLE(W, W, W, W), texel_offset); ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_INFO: ++ write_sm4_sampleinfo(tpf, load); ++ break; ++ ++ case HLSL_RESOURCE_RESINFO: ++ write_sm4_resinfo(tpf, load); + break; + } +} + -+static void write_sm4_resource_store(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) ++static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) +{ -+ const struct hlsl_type *resource_type = store->resource.var->data_type; -+ -+ if (!hlsl_type_is_resource(resource_type)) -+ { -+ hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); -+ return; -+ } ++ struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); + + if (!store->resource.var->is_uniform) + { -+ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); ++ hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); + return; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { -+ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); ++ hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); + return; + } + -+ write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); ++ write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); +} + -+static void write_sm4_store(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) ++static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) +{ + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; @@ -27209,18 +36449,17 @@ index 00000000000..d066b13ee4e + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + -+ sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); ++ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); + instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); + instr.src_count = 1; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_swizzle(struct hlsl_ctx *ctx, -+ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) ++static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) +{ + struct sm4_instruction instr; + unsigned int writemask; @@ -27236,11 +36475,10 @@ index 00000000000..d066b13ee4e + swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); + instr.src_count = 1; + -+ write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); +} + -+static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_block *block) ++static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) +{ + const struct hlsl_ir_node *instr; + @@ -27250,12 +36488,12 @@ index 00000000000..d066b13ee4e + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { -+ hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); + break; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { -+ hlsl_fixme(ctx, &instr->loc, "Object copy."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); + break; + } + @@ -27275,43 +36513,43 @@ index 00000000000..d066b13ee4e + vkd3d_unreachable(); + + case HLSL_IR_EXPR: -+ write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); ++ write_sm4_expr(tpf, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: -+ write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); ++ write_sm4_if(tpf, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: -+ write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); ++ write_sm4_jump(tpf, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: -+ write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); ++ write_sm4_load(tpf, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: -+ write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); ++ write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: -+ write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); ++ write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_LOOP: -+ write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); ++ write_sm4_loop(tpf, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_STORE: -+ write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); ++ write_sm4_store(tpf, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: -+ write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); ++ write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); + break; + + default: -+ hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} @@ -27320,12 +36558,13 @@ index 00000000000..d066b13ee4e + const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +{ + const struct hlsl_profile_info *profile = ctx->profile; -+ const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; ++ struct extern_resource *extern_resources; + unsigned int extern_resources_count, i; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + size_t token_count_position; ++ struct tpf_writer tpf; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { @@ -27340,6 +36579,8 @@ index 00000000000..d066b13ee4e + VKD3D_SM4_LIB, + }; + ++ tpf_writer_init(&tpf, ctx, &buffer); ++ + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); @@ -27348,45 +36589,42 @@ index 00000000000..d066b13ee4e + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) -+ write_sm4_dcl_constant_buffer(&buffer, cbuffer); ++ write_sm4_dcl_constant_buffer(&tpf, cbuffer); + } + + for (i = 0; i < extern_resources_count; ++i) + { -+ enum hlsl_regset regset; ++ const struct extern_resource *resource = &extern_resources[i]; + -+ var = extern_resources[i]; -+ regset = hlsl_type_get_regset(var->data_type); -+ -+ if (regset == HLSL_REGSET_SAMPLERS) -+ write_sm4_dcl_samplers(&buffer, var); -+ else if (regset == HLSL_REGSET_TEXTURES) -+ write_sm4_dcl_textures(ctx, &buffer, var, false); -+ else if (regset == HLSL_REGSET_UAVS) -+ write_sm4_dcl_textures(ctx, &buffer, var, true); ++ if (resource->regset == HLSL_REGSET_SAMPLERS) ++ write_sm4_dcl_samplers(&tpf, resource); ++ else if (resource->regset == HLSL_REGSET_TEXTURES) ++ write_sm4_dcl_textures(&tpf, resource, false); ++ else if (resource->regset == HLSL_REGSET_UAVS) ++ write_sm4_dcl_textures(&tpf, resource, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -+ write_sm4_dcl_semantic(ctx, &buffer, var); ++ write_sm4_dcl_semantic(&tpf, var); + } + + if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) -+ write_sm4_dcl_thread_group(&buffer, ctx->thread_count); ++ write_sm4_dcl_thread_group(&tpf, ctx->thread_count); + + if (ctx->temp_count) -+ write_sm4_dcl_temps(&buffer, ctx->temp_count); ++ write_sm4_dcl_temps(&tpf, ctx->temp_count); + -+ write_sm4_block(ctx, &buffer, &entry_func->body); ++ write_sm4_block(&tpf, &entry_func->body); + -+ write_sm4_ret(&buffer); ++ write_sm4_ret(&tpf); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + -+ add_section(dxbc, TAG_SHDR, &buffer); ++ add_section(ctx, dxbc, TAG_SHDR, &buffer); + -+ vkd3d_free(extern_resources); ++ sm4_free_extern_resources(extern_resources, extern_resources_count); +} + +int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) @@ -27409,7 +36647,7 @@ index 00000000000..d066b13ee4e + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index c9c15f01155..343fdb2252e 100644 +index c9c15f01155..0245d83a10b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -22,6 +22,8 @@ @@ -27462,143 +36700,36 @@ index c9c15f01155..343fdb2252e 100644 return offset; } -@@ -1070,7 +1085,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - - if (TRACE_ON()) - { -- vkd3d_shader_trace(parser); -+ vkd3d_shader_trace(&parser->instructions, &parser->shader_version); - } - - for (i = 0; i < parser->instructions.count; ++i) -@@ -1167,75 +1182,73 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - return ret; +@@ -400,6 +415,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t + return "hlsl"; + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + return "d3dbc"; ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ return "dxil"; + default: + FIXME("Unhandled source type %#x.\n", type); + return "bin"; +@@ -425,6 +442,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, + shader_get_source_type_suffix(source_type), shader->code, shader->size); } --static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, -+static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; -+ struct vkd3d_glsl_generator *glsl_generator; - struct vkd3d_shader_compile_info scan_info; -- struct spirv_compiler *spirv_compiler; -- struct vkd3d_shader_parser *parser; - int ret; - -+ vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); -+ - scan_info = *compile_info; - scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; - scan_descriptor_info.next = scan_info.next; - scan_info.next = &scan_descriptor_info; - -- if ((ret = scan_dxbc(&scan_info, message_context)) < 0) -+ if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) - return ret; - -- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) -+ switch (compile_info->target_type) - { -- WARN("Failed to initialise shader parser.\n"); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return ret; -- } -+ case VKD3D_SHADER_TARGET_D3D_ASM: -+ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); -+ break; - -- vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); -+ case VKD3D_SHADER_TARGET_GLSL: -+ if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, -+ message_context, &parser->location))) -+ { -+ ERR("Failed to create GLSL generator.\n"); -+ vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -+ return VKD3D_ERROR; -+ } - -- if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) -- { -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); -- vkd3d_shader_parser_destroy(parser); -- return ret; -- } -+ ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); -+ vkd3d_glsl_generator_destroy(glsl_generator); -+ break; - -- if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) -- { -- struct vkd3d_glsl_generator *glsl_generator; -+ case VKD3D_SHADER_TARGET_SPIRV_BINARY: -+ case VKD3D_SHADER_TARGET_SPIRV_TEXT: -+ ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); -+ break; - -- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, -- message_context, &parser->location))) -- { -- ERR("Failed to create GLSL generator.\n"); -- vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return VKD3D_ERROR; -- } -+ default: -+ /* Validation should prevent us from reaching this. */ -+ assert(0); -+ } - -- ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); -+ vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -+ return ret; -+} - -- vkd3d_glsl_generator_destroy(glsl_generator); -- vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return ret; -- } -+static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) +{ -+ struct vkd3d_shader_parser *parser; -+ int ret; - -- if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, -- compile_info, &scan_descriptor_info, message_context, &parser->location))) -+ if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) - { -- ERR("Failed to create DXBC compiler.\n"); -- vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return VKD3D_ERROR; -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; - } - -- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); -+ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - -- spirv_compiler_destroy(spirv_compiler); - vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } - -@@ -1270,7 +1283,7 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ - - if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) - { -- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); -+ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); - vkd3d_shader_parser_destroy(parser); - return ret; - } -@@ -1388,10 +1401,54 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu - desc->version = 0; ++ struct vkd3d_shader_scan_signature_info *signature_info; ++ ++ if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) ++ { ++ memset(&signature_info->input, 0, sizeof(signature_info->input)); ++ memset(&signature_info->output, 0, sizeof(signature_info->output)); ++ memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); ++ } ++} ++ + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_message_context *message_context, const char *source_name, + const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, +@@ -511,9 +540,46 @@ void vkd3d_shader_free_messages(char *messages) + vkd3d_free(messages); } +static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, @@ -27638,6 +36769,646 @@ index c9c15f01155..343fdb2252e 100644 + return true; +} + + struct vkd3d_shader_scan_context + { +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; + size_t descriptors_size; + + struct vkd3d_shader_message_context *message_context; +@@ -533,20 +599,12 @@ struct vkd3d_shader_scan_context + size_t cf_info_size; + size_t cf_info_count; + +- struct +- { +- unsigned int id; +- unsigned int descriptor_idx; +- } *uav_ranges; +- size_t uav_ranges_size; +- size_t uav_range_count; +- + enum vkd3d_shader_api_version api_version; + }; + + static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context) + { + unsigned int i; +@@ -569,7 +627,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con + + static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) + { +- vkd3d_free(context->uav_ranges); + vkd3d_free(context->cf_info); + } + +@@ -637,18 +694,24 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf + return NULL; + } + +-static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( +- const struct vkd3d_shader_scan_context *context, unsigned int range_id) ++static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, ++ const struct vkd3d_shader_register *reg, uint32_t flag) + { ++ unsigned int range_id = reg->idx[0].offset; + unsigned int i; + +- for (i = 0; i < context->uav_range_count; ++i) ++ if (!context->scan_descriptor_info) ++ return; ++ ++ for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) + { +- if (context->uav_ranges[i].id == range_id) +- return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; ++ if (context->scan_descriptor_info->descriptors[i].type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV ++ && context->scan_descriptor_info->descriptors[i].register_id == range_id) ++ { ++ context->scan_descriptor_info->descriptors[i].flags |= flag; ++ break; ++ } + } +- +- return NULL; + } + + static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) +@@ -664,13 +727,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr + static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + } + + static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) +@@ -683,13 +740,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in + static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); + } + + static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) +@@ -702,93 +753,76 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ + static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); + } + +-static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, +- enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, +- enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, +- unsigned int flags) ++static struct vkd3d_shader_descriptor_info1 *vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, ++ enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, ++ const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, ++ enum vkd3d_shader_resource_data_type resource_data_type) + { +- struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; +- struct vkd3d_shader_descriptor_info *d; ++ struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, + info->descriptor_count + 1, sizeof(*info->descriptors))) + { + ERR("Failed to allocate descriptor info.\n"); +- return false; ++ return NULL; + } + + d = &info->descriptors[info->descriptor_count]; ++ memset(d, 0, sizeof(*d)); + d->type = type; ++ d->register_id = reg->idx[0].offset; + d->register_space = range->space; + d->register_index = range->first; + d->resource_type = resource_type; + d->resource_data_type = resource_data_type; +- d->flags = flags; + d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; + ++info->descriptor_count; + +- return true; +-} +- +-static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, +- unsigned int id, unsigned int descriptor_idx) +-{ +- if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, +- context->uav_range_count + 1, sizeof(*context->uav_ranges))) +- { +- ERR("Failed to allocate UAV range.\n"); +- return false; +- } +- +- context->uav_ranges[context->uav_range_count].id = id; +- context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; +- ++context->uav_range_count; +- +- return true; ++ return d; + } + + static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (!context->scan_descriptor_info) + return; + +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); ++ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ++ &cb->src.reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) ++ return; ++ d->buffer_size = cb->size * 16; + } + + static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; +- unsigned int flags; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (!context->scan_descriptor_info) + return; + ++ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, ++ &sampler->src.reg, &sampler->range, VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT))) ++ return; ++ + if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE) +- flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; +- else +- flags = 0; +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, +- VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); ++ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + } + + static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, +- enum vkd3d_shader_resource_data_type resource_data_type) ++ enum vkd3d_shader_resource_data_type resource_data_type, ++ unsigned int sample_count, unsigned int structure_stride, bool raw) + { ++ struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_descriptor_type type; + + if (!context->scan_descriptor_info) +@@ -798,10 +832,13 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont + type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + else + type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; +- vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); +- if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) +- vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, +- context->scan_descriptor_info->descriptor_count - 1); ++ if (!(d = vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, ++ &resource->range, resource_type, resource_data_type))) ++ return; ++ d->sample_count = sample_count; ++ d->structure_stride = structure_stride; ++ if (raw) ++ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; + } + + static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, +@@ -860,7 +897,7 @@ static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_sca + } + + vkd3d_shader_scan_resource_declaration(context, &semantic->resource, +- semantic->resource_type, resource_data_type); ++ semantic->resource_type, resource_data_type, semantic->sample_count, 0, false); + } + + static void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, +@@ -894,12 +931,13 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_UAV_RAW: + vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.raw_resource.resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); ++ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, 0, true); + break; + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_UAV_STRUCTURED: + vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.structured_resource.resource, +- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); ++ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, ++ instruction->declaration.structured_resource.byte_stride, false); + break; + case VKD3DSIH_IF: + cf_info = vkd3d_shader_scan_push_cf_info(context); +@@ -1051,39 +1089,120 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + return VKD3D_OK; + } + ++static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, ++ const struct vkd3d_shader_scan_descriptor_info1 *info1) ++{ ++ unsigned int i; ++ ++ if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ for (i = 0; i < info1->descriptor_count; ++i) ++ { ++ const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; ++ struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; ++ ++ dst->type = src->type; ++ dst->register_space = src->register_space; ++ dst->register_index = src->register_index; ++ dst->resource_type = src->resource_type; ++ dst->resource_data_type = src->resource_data_type; ++ dst->flags = src->flags; ++ dst->count = src->count; ++ } ++ info->descriptor_count = info1->descriptor_count; ++ ++ return VKD3D_OK; ++} ++ ++static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) ++{ ++ TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); ++ ++ vkd3d_free(scan_descriptor_info->descriptors); ++} ++ + static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) ++ struct vkd3d_shader_message_context *message_context, ++ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) + { +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; ++ struct vkd3d_shader_scan_descriptor_info *descriptor_info; ++ struct vkd3d_shader_scan_signature_info *signature_info; + struct vkd3d_shader_instruction *instruction; + struct vkd3d_shader_scan_context context; + int ret = VKD3D_OK; + unsigned int i; + +- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) ++ descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); ++ if (descriptor_info1) + { +- scan_descriptor_info->descriptors = NULL; +- scan_descriptor_info->descriptor_count = 0; ++ descriptor_info1->descriptors = NULL; ++ descriptor_info1->descriptor_count = 0; + } ++ else if (descriptor_info) ++ { ++ descriptor_info1 = &local_descriptor_info1; ++ } ++ signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); + +- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); ++ vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context); + + if (TRACE_ON()) + { +- vkd3d_shader_trace(parser); ++ vkd3d_shader_trace(&parser->instructions, &parser->shader_version); + } + + for (i = 0; i < parser->instructions.count; ++i) + { + instruction = &parser->instructions.elements[i]; + if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) +- { +- if (scan_descriptor_info) +- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); + break; ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) ++ { ++ unsigned int size = parser->shader_desc.flat_constant_count[i].external; ++ struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; ++ struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; ++ struct vkd3d_shader_descriptor_info1 *d; ++ ++ if (parser->shader_desc.flat_constant_count[i].external) ++ { ++ if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, ++ &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) ++ d->buffer_size = size * 16; + } + } + ++ if (!ret && signature_info) ++ { ++ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) ++ || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, ++ &parser->shader_desc.output_signature) ++ || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, ++ &parser->shader_desc.patch_constant_signature)) ++ { ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ } ++ ++ if (!ret && descriptor_info) ++ ret = convert_descriptor_info(descriptor_info, descriptor_info1); ++ ++ if (ret < 0) ++ { ++ if (descriptor_info) ++ vkd3d_shader_free_scan_descriptor_info(descriptor_info); ++ if (descriptor_info1) ++ vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); ++ if (signature_info) ++ vkd3d_shader_free_scan_signature_info(signature_info); ++ } ++ else ++ { ++ vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); ++ } + vkd3d_shader_scan_context_cleanup(&context); + return ret; + } +@@ -1100,7 +1219,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +- ret = scan_with_parser(compile_info, message_context, parser); ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +@@ -1118,7 +1237,25 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +- ret = scan_with_parser(compile_info, message_context, parser); ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); ++ vkd3d_shader_parser_destroy(parser); ++ ++ return ret; ++} ++ ++static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_parser *parser; ++ int ret; ++ ++ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) ++ { ++ WARN("Failed to initialise shader parser.\n"); ++ return ret; ++ } ++ ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +@@ -1137,6 +1274,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) + return ret; + ++ init_scan_signature_info(compile_info); ++ + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) +@@ -1154,6 +1293,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + ret = scan_d3dbc(compile_info, &message_context); + break; + ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ ret = scan_dxil(compile_info, &message_context); ++ break; ++ + default: + ERR("Unsupported source type %#x.\n", compile_info->source_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; +@@ -1167,75 +1310,70 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + return ret; + } + +-static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, ++static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { +- struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; ++ struct vkd3d_glsl_generator *glsl_generator; + struct vkd3d_shader_compile_info scan_info; +- struct spirv_compiler *spirv_compiler; +- struct vkd3d_shader_parser *parser; + int ret; + ++ vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); ++ + scan_info = *compile_info; +- scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; +- scan_descriptor_info.next = scan_info.next; +- scan_info.next = &scan_descriptor_info; + +- if ((ret = scan_dxbc(&scan_info, message_context)) < 0) ++ if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) + return ret; + +- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) ++ switch (compile_info->target_type) + { +- WARN("Failed to initialise shader parser.\n"); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return ret; +- } ++ case VKD3D_SHADER_TARGET_D3D_ASM: ++ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); ++ break; + +- vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); ++ case VKD3D_SHADER_TARGET_GLSL: ++ if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, ++ message_context, &parser->location))) ++ { ++ ERR("Failed to create GLSL generator.\n"); ++ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); ++ return VKD3D_ERROR; ++ } + +- if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) +- { +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); +- vkd3d_shader_parser_destroy(parser); +- return ret; +- } ++ ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); ++ vkd3d_glsl_generator_destroy(glsl_generator); ++ break; + +- if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) +- { +- struct vkd3d_glsl_generator *glsl_generator; ++ case VKD3D_SHADER_TARGET_SPIRV_BINARY: ++ case VKD3D_SHADER_TARGET_SPIRV_TEXT: ++ ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); ++ break; + +- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, +- message_context, &parser->location))) +- { +- ERR("Failed to create GLSL generator.\n"); +- vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return VKD3D_ERROR; +- } ++ default: ++ /* Validation should prevent us from reaching this. */ ++ assert(0); ++ } + +- ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); ++ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); ++ return ret; ++} + +- vkd3d_glsl_generator_destroy(glsl_generator); +- vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return ret; +- } ++static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_parser *parser; ++ int ret; + +- if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, +- compile_info, &scan_descriptor_info, message_context, &parser->location))) ++ if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) + { +- ERR("Failed to create DXBC compiler.\n"); +- vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return VKD3D_ERROR; ++ WARN("Failed to initialise shader parser.\n"); ++ return ret; + } + +- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); ++ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); + +- spirv_compiler_destroy(spirv_compiler); + vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; + } + +@@ -1270,7 +1408,7 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ + + if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { +- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); ++ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); + vkd3d_shader_parser_destroy(parser); + return ret; + } +@@ -1278,6 +1416,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ + return VKD3D_ERROR; + } + ++static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_parser *parser; ++ int ret; ++ ++ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) ++ { ++ WARN("Failed to initialise shader parser.\n"); ++ return ret; ++ } ++ ++ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); ++ ++ vkd3d_shader_parser_destroy(parser); ++ return ret; ++} ++ + int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages) + { +@@ -1292,6 +1448,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) + return ret; + ++ init_scan_signature_info(compile_info); ++ + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + + switch (compile_info->source_type) +@@ -1308,6 +1466,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + ret = compile_d3d_bytecode(compile_info, out, &message_context); + break; + ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ ret = compile_dxbc_dxil(compile_info, out, &message_context); ++ break; ++ + default: + vkd3d_unreachable(); + } +@@ -1326,6 +1488,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ + vkd3d_free(scan_descriptor_info->descriptors); + } + ++void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) ++{ ++ TRACE("info %p.\n", info); ++ ++ vkd3d_shader_free_shader_signature(&info->input); ++ vkd3d_shader_free_shader_signature(&info->output); ++ vkd3d_shader_free_shader_signature(&info->patch_constant); ++} ++ + void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) + { + TRACE("shader_code %p.\n", shader_code); +@@ -1388,10 +1559,17 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu + desc->version = 0; + } + +void shader_signature_cleanup(struct shader_signature *signature) +{ + vkd3d_free(signature->elements); @@ -27652,7 +37423,7 @@ index c9c15f01155..343fdb2252e 100644 int ret; TRACE("dxbc {%p, %zu}, signature %p, messages %p.\n", dxbc->code, dxbc->size, signature, messages); -@@ -1400,13 +1457,17 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, +@@ -1400,13 +1578,17 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, *messages = NULL; vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); @@ -27671,7 +37442,37 @@ index c9c15f01155..343fdb2252e 100644 return ret; } -@@ -1642,6 +1703,84 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins +@@ -1465,6 +1647,9 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns + VKD3D_SHADER_SOURCE_DXBC_TPF, + VKD3D_SHADER_SOURCE_HLSL, + VKD3D_SHADER_SOURCE_D3D_BYTECODE, ++#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL ++ VKD3D_SHADER_SOURCE_DXBC_DXIL, ++#endif + }; + + TRACE("count %p.\n", count); +@@ -1503,6 +1688,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + + switch (source_type) + { ++#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++#endif + case VKD3D_SHADER_SOURCE_DXBC_TPF: + *count = ARRAY_SIZE(dxbc_tpf_types); + return dxbc_tpf_types; +@@ -1588,9 +1776,6 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, + { + void *params; + +- if (!count) +- return NULL; +- + if (count > allocator->count - allocator->index) + { + struct vkd3d_shader_param_node *next = shader_param_allocator_node_create(allocator); +@@ -1642,6 +1827,84 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins return true; } @@ -27756,8 +37557,50 @@ index c9c15f01155..343fdb2252e 100644 void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions) { unsigned int i; +@@ -1653,3 +1916,41 @@ void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *ins + vkd3d_free(instructions->icbs[i]); + vkd3d_free(instructions->icbs); + } ++ ++void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, ++ const struct vkd3d_shader_signature *input_signature, ++ unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) ++{ ++ unsigned int count = 0; ++ unsigned int i; ++ ++ TRACE("output_signature %p, input_signature %p, ret_count %p, varyings %p.\n", ++ output_signature, input_signature, ret_count, varyings); ++ ++ for (i = 0; i < input_signature->element_count; ++i) ++ { ++ const struct vkd3d_shader_signature_element *input_element, *output_element; ++ ++ input_element = &input_signature->elements[i]; ++ ++ if (input_element->sysval_semantic != VKD3D_SHADER_SV_NONE) ++ continue; ++ ++ varyings[count].input_register_index = input_element->register_index; ++ varyings[count].input_mask = input_element->mask; ++ ++ if ((output_element = vkd3d_shader_find_signature_element(output_signature, ++ input_element->semantic_name, input_element->semantic_index, 0))) ++ { ++ varyings[count].output_signature_index = output_element - output_signature->elements; ++ } ++ else ++ { ++ varyings[count].output_signature_index = output_signature->element_count; ++ } ++ ++ ++count; ++ } ++ ++ *ret_count = count; ++} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 79be999adf9..406d53a3391 100644 +index 79be999adf9..eab1c730ae9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -49,7 +49,7 @@ @@ -27769,7 +37612,7 @@ index 79be999adf9..406d53a3391 100644 #include #include -@@ -74,6 +74,13 @@ enum vkd3d_shader_error +@@ -74,6 +74,18 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY = 1002, @@ -27777,21 +37620,77 @@ index 79be999adf9..406d53a3391 100644 + VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, + VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, + VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, ++ VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, ++ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, ++ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, + + VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, ++ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK = 1302, ++ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE = 1303, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, -@@ -125,6 +132,7 @@ enum vkd3d_shader_error +@@ -81,6 +93,8 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, + VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004, + ++ VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, ++ + VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, + VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, + VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE = 3002, +@@ -125,10 +139,15 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, ++ VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -223,6 +231,7 @@ enum vkd3d_shader_opcode + VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, ++ VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, ++ VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, ++ VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + + VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + +@@ -137,8 +156,33 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, + VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, + VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, ++ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, ++ VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, + + VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, ++ ++ VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, ++ VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, ++ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND = 8012, ++ ++ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, ++ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, ++ VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, ++ VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, ++ VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, ++ VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC = 8305, ++ ++ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, + }; + + enum vkd3d_shader_opcode +@@ -223,6 +267,7 @@ enum vkd3d_shader_opcode VKD3DSIH_DEQ, VKD3DSIH_DFMA, VKD3DSIH_DGE, @@ -27799,7 +37698,32 @@ index 79be999adf9..406d53a3391 100644 VKD3DSIH_DIV, VKD3DSIH_DLT, VKD3DSIH_DMAX, -@@ -675,6 +684,7 @@ struct vkd3d_shader_register +@@ -477,6 +522,9 @@ enum vkd3d_shader_register_type + VKD3DSPR_DEPTHOUTLE, + VKD3DSPR_RASTERIZER, + VKD3DSPR_OUTSTENCILREF, ++ VKD3DSPR_UNDEF, ++ ++ VKD3DSPR_COUNT, + + VKD3DSPR_INVALID = ~0u, + }; +@@ -507,8 +555,14 @@ enum vkd3d_data_type + VKD3D_DATA_DOUBLE, + VKD3D_DATA_CONTINUED, + VKD3D_DATA_UNUSED, ++ VKD3D_DATA_UINT8, + }; + ++static inline bool data_type_is_integer(enum vkd3d_data_type data_type) ++{ ++ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT; ++} ++ + enum vkd3d_immconst_type + { + VKD3D_IMMCONST_SCALAR, +@@ -675,6 +729,7 @@ struct vkd3d_shader_register bool non_uniform; enum vkd3d_data_type data_type; struct vkd3d_shader_register_index idx[3]; @@ -27807,10 +37731,22 @@ index 79be999adf9..406d53a3391 100644 enum vkd3d_immconst_type immconst_type; union { -@@ -774,13 +784,36 @@ enum vkd3d_shader_input_sysval_semantic +@@ -686,6 +741,9 @@ struct vkd3d_shader_register + } u; + }; + ++void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, ++ enum vkd3d_data_type data_type, unsigned int idx_count); ++ + struct vkd3d_shader_dst_param + { + struct vkd3d_shader_register reg; +@@ -774,13 +832,51 @@ enum vkd3d_shader_input_sysval_semantic VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, }; ++#define SIGNATURE_TARGET_LOCATION_UNUSED (~0u) ++ +struct signature_element +{ + unsigned int sort_index; @@ -27819,16 +37755,21 @@ index 79be999adf9..406d53a3391 100644 + unsigned int stream_index; + enum vkd3d_shader_sysval_semantic sysval_semantic; + enum vkd3d_shader_component_type component_type; ++ /* Register index in the source shader. */ + unsigned int register_index; + unsigned int register_count; + unsigned int mask; + unsigned int used_mask; + enum vkd3d_shader_minimum_precision min_precision; ++ /* Register index / location in the target shader. ++ * If SIGNATURE_TARGET_LOCATION_UNUSED, this element should not be written. */ ++ unsigned int target_location; +}; + +struct shader_signature +{ + struct signature_element *elements; ++ size_t elements_capacity; + unsigned int element_count; +}; + @@ -27841,13 +37782,30 @@ index 79be999adf9..406d53a3391 100644 - struct vkd3d_shader_signature input_signature; - struct vkd3d_shader_signature output_signature; - struct vkd3d_shader_signature patch_constant_signature; ++ bool is_dxil; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; ++ ++ uint32_t temp_count; ++ ++ struct ++ { ++ uint32_t used, external; ++ } flat_constant_count[3]; }; struct vkd3d_shader_register_semantic -@@ -927,6 +960,11 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg +@@ -912,6 +1008,8 @@ struct vkd3d_shader_instruction + } declaration; + }; + ++void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); ++ + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) + { + return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; +@@ -927,6 +1025,16 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT; } @@ -27855,11 +37813,16 @@ index 79be999adf9..406d53a3391 100644 +{ + return reg->type == VKD3DSPR_PATCHCONST; +} ++ ++static inline bool register_is_constant(const struct vkd3d_shader_register *reg) ++{ ++ return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); ++} + struct vkd3d_shader_location { const char *source_name; -@@ -981,6 +1019,8 @@ bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instru +@@ -981,6 +1089,8 @@ bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instru bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, struct vkd3d_shader_immediate_constant_buffer *icb); @@ -27868,7 +37831,7 @@ index 79be999adf9..406d53a3391 100644 void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); struct vkd3d_shader_parser -@@ -991,10 +1031,8 @@ struct vkd3d_shader_parser +@@ -991,10 +1101,8 @@ struct vkd3d_shader_parser struct vkd3d_shader_desc shader_desc; struct vkd3d_shader_version shader_version; @@ -27879,17 +37842,38 @@ index 79be999adf9..406d53a3391 100644 }; struct vkd3d_shader_parser_ops -@@ -1028,7 +1066,8 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse +@@ -1028,7 +1136,29 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse parser->ops->parser_destroy(parser); } -void vkd3d_shader_trace(struct vkd3d_shader_parser *parser); ++struct vkd3d_shader_descriptor_info1 ++{ ++ enum vkd3d_shader_descriptor_type type; ++ unsigned int register_space; ++ unsigned int register_index; ++ unsigned int register_id; ++ enum vkd3d_shader_resource_type resource_type; ++ enum vkd3d_shader_resource_data_type resource_data_type; ++ unsigned int flags; ++ unsigned int sample_count; ++ unsigned int buffer_size; ++ unsigned int structure_stride; ++ unsigned int count; ++}; ++ ++struct vkd3d_shader_scan_descriptor_info1 ++{ ++ struct vkd3d_shader_descriptor_info1 *descriptors; ++ unsigned int descriptor_count; ++}; ++ +void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version); const char *shader_get_type_prefix(enum vkd3d_shader_type type); -@@ -1044,8 +1083,9 @@ struct vkd3d_string_buffer_cache +@@ -1044,8 +1174,9 @@ struct vkd3d_string_buffer_cache size_t count, max_count, capacity; }; @@ -27901,7 +37885,7 @@ index 79be999adf9..406d53a3391 100644 void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); -@@ -1067,6 +1107,8 @@ struct vkd3d_bytecode_buffer +@@ -1067,6 +1198,8 @@ struct vkd3d_bytecode_buffer int status; }; @@ -27910,7 +37894,12 @@ index 79be999adf9..406d53a3391 100644 size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value); -@@ -1128,8 +1170,10 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi +@@ -1125,11 +1258,15 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); + int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); ++int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); void free_shader_desc(struct vkd3d_shader_desc *desc); @@ -27922,7 +37911,7 @@ index 79be999adf9..406d53a3391 100644 struct vkd3d_glsl_generator; -@@ -1141,16 +1185,10 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); +@@ -1141,16 +1278,10 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); #define SPIRV_MAX_SRC_COUNT 6 @@ -27930,22 +37919,47 @@ index 79be999adf9..406d53a3391 100644 - -struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -+int spirv_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); -int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv); -void spirv_compiler_destroy(struct spirv_compiler *compiler); ++int spirv_compile(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); -@@ -1202,6 +1240,14 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( +@@ -1202,6 +1333,38 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( } } ++static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resource_data_type( ++ enum vkd3d_shader_resource_data_type data_type) ++{ ++ switch (data_type) ++ { ++ case VKD3D_SHADER_RESOURCE_DATA_FLOAT: ++ case VKD3D_SHADER_RESOURCE_DATA_UNORM: ++ case VKD3D_SHADER_RESOURCE_DATA_SNORM: ++ return VKD3D_SHADER_COMPONENT_FLOAT; ++ case VKD3D_SHADER_RESOURCE_DATA_UINT: ++ return VKD3D_SHADER_COMPONENT_UINT; ++ case VKD3D_SHADER_RESOURCE_DATA_INT: ++ return VKD3D_SHADER_COMPONENT_INT; ++ case VKD3D_SHADER_RESOURCE_DATA_DOUBLE: ++ case VKD3D_SHADER_RESOURCE_DATA_CONTINUED: ++ return VKD3D_SHADER_COMPONENT_DOUBLE; ++ default: ++ FIXME("Unhandled data type %#x.\n", data_type); ++ /* fall-through */ ++ case VKD3D_SHADER_RESOURCE_DATA_MIXED: ++ return VKD3D_SHADER_COMPONENT_UINT; ++ } ++} ++ +enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index); + @@ -27957,23 +37971,294 @@ index 79be999adf9..406d53a3391 100644 static inline unsigned int vkd3d_write_mask_get_component_idx(DWORD write_mask) { unsigned int i; -@@ -1323,4 +1369,11 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void +@@ -1293,6 +1456,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, + } + + #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) ++#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) + + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') + #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') +@@ -1323,4 +1487,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); -+enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); -+enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( -+ struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); -+enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, -+ enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, -+ struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); ++enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info); + #endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 4e2d4295935..c5bd687bd69 100644 +index 4e2d4295935..42a98763438 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -1437,7 +1437,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( +@@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF + static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); + static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value); ++static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); + static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); + static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); + +@@ -453,9 +454,9 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( + } + + /* ID3D12Fence */ +-static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) ++static struct d3d12_fence *impl_from_ID3D12Fence1(ID3D12Fence1 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence1_iface); + } + + static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) +@@ -899,18 +900,19 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin + vkd3d_mutex_unlock(&fence->mutex); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence1 *iface, + REFIID riid, void **object) + { + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + +- if (IsEqualGUID(riid, &IID_ID3D12Fence) ++ if (IsEqualGUID(riid, &IID_ID3D12Fence1) ++ || IsEqualGUID(riid, &IID_ID3D12Fence) + || IsEqualGUID(riid, &IID_ID3D12Pageable) + || IsEqualGUID(riid, &IID_ID3D12DeviceChild) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { +- ID3D12Fence_AddRef(iface); ++ ID3D12Fence1_AddRef(iface); + *object = iface; + return S_OK; + } +@@ -921,9 +923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) ++static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence1 *iface) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + ULONG refcount = InterlockedIncrement(&fence->refcount); + + TRACE("%p increasing refcount to %u.\n", fence, refcount); +@@ -936,9 +938,9 @@ static void d3d12_fence_incref(struct d3d12_fence *fence) + InterlockedIncrement(&fence->internal_refcount); + } + +-static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) ++static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence1 *iface) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + ULONG refcount = InterlockedDecrement(&fence->refcount); + + TRACE("%p decreasing refcount to %u.\n", fence, refcount); +@@ -971,10 +973,10 @@ static void d3d12_fence_decref(struct d3d12_fence *fence) + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence1 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -982,10 +984,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, + return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence1 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -993,37 +995,37 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, + return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence1 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&fence->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence1 *iface, const WCHAR *name) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) ++static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence1 *iface, REFIID iid, void **device) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(fence->device, iid, device); + } + +-static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) ++static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence1 *iface) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + uint64_t completed_value; + + TRACE("iface %p.\n", iface); +@@ -1034,10 +1036,10 @@ static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface + return completed_value; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence1 *iface, + UINT64 value, HANDLE event) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + unsigned int i; + bool latch = false; + +@@ -1105,9 +1107,9 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen + return d3d12_device_flush_blocked_queues(fence->device); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) ++static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence1 *iface, UINT64 value) + { +- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); + + TRACE("iface %p, value %#"PRIx64".\n", iface, value); + +@@ -1116,7 +1118,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v + return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); + } + +-static const struct ID3D12FenceVtbl d3d12_fence_vtbl = ++static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(ID3D12Fence1 *iface) ++{ ++ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); ++ ++ TRACE("iface %p.\n", iface); ++ ++ return fence->flags; ++} ++ ++static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl = + { + /* IUnknown methods */ + d3d12_fence_QueryInterface, +@@ -1133,14 +1144,18 @@ static const struct ID3D12FenceVtbl d3d12_fence_vtbl = + d3d12_fence_GetCompletedValue, + d3d12_fence_SetEventOnCompletion, + d3d12_fence_Signal, ++ /* ID3D12Fence1 methods */ ++ d3d12_fence_GetCreationFlags, + }; + + static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) + { +- if (!iface) ++ ID3D12Fence1 *iface1; ++ ++ if (!(iface1 = (ID3D12Fence1 *)iface)) + return NULL; +- assert(iface->lpVtbl == &d3d12_fence_vtbl); +- return impl_from_ID3D12Fence(iface); ++ assert(iface1->lpVtbl == &d3d12_fence_vtbl); ++ return impl_from_ID3D12Fence1(iface1); + } + + static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, +@@ -1150,7 +1165,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * + VkResult vr; + HRESULT hr; + +- fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; ++ fence->ID3D12Fence1_iface.lpVtbl = &d3d12_fence_vtbl; + fence->internal_refcount = 1; + fence->refcount = 1; + +@@ -1161,7 +1176,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * + + vkd3d_cond_init(&fence->null_event_cond); + +- if (flags) ++ if ((fence->flags = flags)) + FIXME("Ignoring flags %#x.\n", flags); + + fence->events = NULL; +@@ -1315,32 +1330,26 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm + return hr; + } + +- allocator->current_command_list = list; +- +- return S_OK; +-} +- +-static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, +- struct d3d12_command_list *list) +-{ +- struct d3d12_device *device = allocator->device; +- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +- +- TRACE("allocator %p, list %p.\n", allocator, list); +- +- if (allocator->current_command_list == list) +- allocator->current_command_list = NULL; +- + if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, + allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) + { + WARN("Failed to add command buffer.\n"); + VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, + 1, &list->vk_command_buffer)); +- return; ++ return E_OUTOFMEMORY; + } +- + allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; ++ ++ allocator->current_command_list = list; ++ ++ return S_OK; ++} ++ ++static void d3d12_command_allocator_remove_command_list(struct d3d12_command_allocator *allocator, ++ const struct d3d12_command_list *list) ++{ ++ if (allocator->current_command_list == list) ++ allocator->current_command_list = NULL; + } + + static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) +@@ -1437,7 +1446,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; @@ -27982,16 +38267,228 @@ index 4e2d4295935..c5bd687bd69 100644 pool_desc.pPoolSizes = device->vk_pool_sizes; if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { -@@ -2463,6 +2463,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, +@@ -1910,10 +1919,32 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, + return S_OK; + } + ++static void d3d12_command_signature_incref(struct d3d12_command_signature *signature) ++{ ++ vkd3d_atomic_increment(&signature->internal_refcount); ++} ++ ++static void d3d12_command_signature_decref(struct d3d12_command_signature *signature) ++{ ++ unsigned int refcount = vkd3d_atomic_decrement(&signature->internal_refcount); ++ ++ if (!refcount) ++ { ++ struct d3d12_device *device = signature->device; ++ ++ vkd3d_private_store_destroy(&signature->private_store); ++ ++ vkd3d_free((void *)signature->desc.pArgumentDescs); ++ vkd3d_free(signature); ++ ++ d3d12_device_release(device); ++ } ++} ++ + /* ID3D12CommandList */ +-static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) ++static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList3(ID3D12GraphicsCommandList3 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); + } + + static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) +@@ -2259,12 +2290,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList3 *iface, + REFIID iid, void **object) + { + TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); + +- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) ++ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) ++ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) + || IsEqualGUID(iid, &IID_ID3D12CommandList) +@@ -2272,7 +2304,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { +- ID3D12GraphicsCommandList2_AddRef(iface); ++ ID3D12GraphicsCommandList3_AddRef(iface); + *object = iface; + return S_OK; + } +@@ -2283,9 +2315,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + ULONG refcount = InterlockedIncrement(&list->refcount); + + TRACE("%p increasing refcount to %u.\n", list, refcount); +@@ -2298,9 +2330,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind + vkd3d_free(bindings->vk_uav_counter_views); + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + ULONG refcount = InterlockedDecrement(&list->refcount); + + TRACE("%p decreasing refcount to %u.\n", list, refcount); +@@ -2313,7 +2345,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL + + /* When command pool is destroyed, all command buffers are implicitly freed. */ + if (list->allocator) +- d3d12_command_allocator_free_command_buffer(list->allocator, list); ++ d3d12_command_allocator_remove_command_list(list->allocator, list); + + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); + vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); +@@ -2326,66 +2358,66 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL + return refcount; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList3 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&list->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList3 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&list->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList3 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&list->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList3 *iface, const WCHAR *name) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **device) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(list->device, iid, device); + } + +-static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) ++static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p.\n", iface); + + return list->type; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList3 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + VkResult vr; + +@@ -2411,7 +2443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL + + if (list->allocator) + { +- d3d12_command_allocator_free_command_buffer(list->allocator, list); ++ d3d12_command_allocator_remove_command_list(list->allocator, list); + list->allocator = NULL; + } + +@@ -2429,7 +2461,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL + static void d3d12_command_list_reset_state(struct d3d12_command_list *list, + ID3D12PipelineState *initial_pipeline_state) + { +- ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; ++ ID3D12GraphicsCommandList3 *iface = &list->ID3D12GraphicsCommandList3_iface; + + memset(list->strides, 0, sizeof(list->strides)); + list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; +@@ -2463,14 +2495,16 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets)); +- ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); + list->descriptor_heap_count = 0; + - ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); ++ ID3D12GraphicsCommandList3_SetPipelineState(iface, initial_pipeline_state); } -@@ -2720,28 +2722,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList3 *iface, + ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) + { + struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + HRESULT hr; + + TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", +@@ -2497,7 +2531,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL + return hr; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList3 *iface, + ID3D12PipelineState *pipeline_state) + { + FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); +@@ -2720,28 +2754,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des unsigned int index, bool use_array) { uint32_t descriptor_range_magic = range->descriptor_magic; @@ -28028,7 +38525,7 @@ index 4e2d4295935..c5bd687bd69 100644 break; case VKD3D_DESCRIPTOR_MAGIC_SRV: -@@ -2752,8 +2757,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des +@@ -2752,8 +2789,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des * in pairs in one set. */ if (range->descriptor_count == UINT_MAX) { @@ -28039,7 +38536,7 @@ index 4e2d4295935..c5bd687bd69 100644 { vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; vk_descriptor_write->dstBinding = 0; -@@ -2763,21 +2768,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des +@@ -2763,21 +2800,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des { if (!use_array) vk_descriptor_write->dstBinding = vk_binding + 2 * index; @@ -28068,7 +38565,7 @@ index 4e2d4295935..c5bd687bd69 100644 ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; vk_descriptor_write->pImageInfo = vk_image_info; -@@ -2785,7 +2790,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des +@@ -2785,7 +2822,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: @@ -28077,7 +38574,7 @@ index 4e2d4295935..c5bd687bd69 100644 vk_image_info->imageView = VK_NULL_HANDLE; vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; -@@ -2793,7 +2798,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des +@@ -2793,7 +2830,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; default: @@ -28086,7 +38583,7 @@ index 4e2d4295935..c5bd687bd69 100644 return false; } -@@ -2847,6 +2852,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list +@@ -2847,6 +2884,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list for (j = 0; j < descriptor_count; ++j, ++descriptor) { unsigned int register_idx = range->base_register_idx + j; @@ -28098,7 +38595,7 @@ index 4e2d4295935..c5bd687bd69 100644 /* Track UAV counters. */ if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) -@@ -2856,8 +2866,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list +@@ -2856,8 +2898,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list if (state->uav_counters.bindings[k].register_space == range->register_space && state->uav_counters.bindings[k].register_index == register_idx) { @@ -28107,7 +38604,7 @@ index 4e2d4295935..c5bd687bd69 100644 if (bindings->vk_uav_counter_views[k] != vk_counter_view) bindings->uav_counters_dirty = true; bindings->vk_uav_counter_views[k] = vk_counter_view; -@@ -2867,7 +2875,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list +@@ -2867,7 +2907,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list } /* Not all descriptors are necessarily populated if the range is unbounded. */ @@ -28116,7 +38613,7 @@ index 4e2d4295935..c5bd687bd69 100644 continue; if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, -@@ -3153,6 +3161,30 @@ static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_lis +@@ -3153,6 +3193,47 @@ static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_lis } } @@ -28143,11 +38640,28 @@ index 4e2d4295935..c5bd687bd69 100644 + vkd3d_mutex_unlock(&list->descriptor_heaps[i]->vk_sets_mutex); + } +} ++ ++static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) ++{ ++ if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) ++ { ++ if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) ++ { ++ /* Descriptors can be written after binding. */ ++ FIXME("Flushing descriptor updates while list %p is not closed.\n", list); ++ vkd3d_mutex_lock(&heap->vk_sets_mutex); ++ d3d12_desc_flush_vk_heap_updates_locked(heap, list->device); ++ vkd3d_mutex_unlock(&heap->vk_sets_mutex); ++ return; ++ } ++ list->descriptor_heaps[list->descriptor_heap_count++] = heap; ++ } ++} + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) { -@@ -3177,10 +3209,18 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l +@@ -3177,10 +3258,6 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l bindings->sampler_heap_id = heap->serial_id; } @@ -28155,22 +38669,217 @@ index 4e2d4295935..c5bd687bd69 100644 - * be synchronised. On an experimental branch in which caching of Vk descriptor writes - * greatly increased the chance of multiple threads arriving here at the same time, - * GRID 2019 crashed without the mutex lock. */ -+ if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) -+ { -+ if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -+ { -+ /* Descriptors can be written after binding. */ -+ FIXME("Flushing descriptor updates while list %p is not closed.\n", list); -+ command_list_flush_vk_heap_updates(list); -+ list->descriptor_heap_count = 0; -+ } -+ list->descriptor_heaps[list->descriptor_heap_count++] = heap; -+ } -+ vkd3d_mutex_lock(&heap->vk_sets_mutex); for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) -@@ -3963,10 +4003,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo +@@ -3313,11 +3390,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList3 *iface, + UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, + UINT start_instance_location) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " +@@ -3337,11 +3414,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom + instance_count, start_vertex_location, start_instance_location)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList3 *iface, + UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, + INT base_vertex_location, UINT start_instance_location) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " +@@ -3363,10 +3440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap + instance_count, start_vertex_location, base_vertex_location, start_instance_location)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList3 *iface, + UINT x, UINT y, UINT z) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); +@@ -3382,10 +3459,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL + VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy buffer_copy; +@@ -3584,7 +3661,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ + static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, + struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, + const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, +- unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) ++ unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) + { + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; +@@ -3611,6 +3688,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + buffer_image_copy.bufferImageHeight = 0; + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + src_format, src_sub_resource_idx, src_desc->MipLevels); ++ buffer_image_copy.imageSubresource.layerCount = layer_count; + src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + buffer_image_copy.imageOffset.x = 0; + buffer_image_copy.imageOffset.y = 0; +@@ -3618,7 +3696,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); + + buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * +- buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; ++ buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; + if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) + { + ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); +@@ -3644,6 +3722,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + + vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, + dst_format, dst_sub_resource_idx, dst_desc->MipLevels); ++ buffer_image_copy.imageSubresource.layerCount = layer_count; + dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + + assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == +@@ -3665,11 +3744,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) + && box->back > box->front; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList3 *iface, + const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, + const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *src_format, *dst_format; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -3773,7 +3852,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + { + d3d12_command_list_copy_incompatible_texture_region(list, + dst_resource, dst->u.SubresourceIndex, dst_format, +- src_resource, src->u.SubresourceIndex, src_format); ++ src_resource, src->u.SubresourceIndex, src_format, 1); + return; + } + +@@ -3790,11 +3869,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst, ID3D12Resource *src) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *dst_resource, *src_resource; ++ const struct vkd3d_format *dst_format, *src_format; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy vk_buffer_copy; + VkImageCopy vk_image_copy; +@@ -3827,16 +3907,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + else + { + layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); ++ dst_format = dst_resource->format; ++ src_format = src_resource->format; + + assert(d3d12_resource_is_texture(dst_resource)); + assert(d3d12_resource_is_texture(src_resource)); + assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); + assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); + ++ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) ++ { ++ for (i = 0; i < dst_resource->desc.MipLevels; ++i) ++ { ++ d3d12_command_list_copy_incompatible_texture_region(list, ++ dst_resource, i, dst_format, ++ src_resource, i, src_format, layer_count); ++ } ++ return; ++ } ++ + for (i = 0; i < dst_resource->desc.MipLevels; ++i) + { + vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, +- src_resource->format, dst_resource->format, NULL, 0, 0, 0); ++ src_format, dst_format, NULL, 0, 0, 0); + vk_image_copy.dstSubresource.layerCount = layer_count; + vk_image_copy.srcSubresource.layerCount = layer_count; + VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, +@@ -3846,7 +3939,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, + const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, + D3D12_TILE_COPY_FLAGS flags) +@@ -3857,11 +3950,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand + buffer, buffer_offset, flags); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst, UINT dst_sub_resource_idx, + ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_format *src_format, *dst_format, *vk_format; + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -3924,10 +4017,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList3 *iface, + D3D12_PRIMITIVE_TOPOLOGY topology) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, topology %#x.\n", iface, topology); + +@@ -3938,11 +4031,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr + d3d12_command_list_invalidate_current_pipeline(list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList3 *iface, + UINT viewport_count, const D3D12_VIEWPORT *viewports) + { + VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; + +@@ -3963,10 +4056,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo vk_viewports[i].minDepth = viewports[i].MinDepth; vk_viewports[i].maxDepth = viewports[i].MaxDepth; @@ -28186,7 +38895,216 @@ index 4e2d4295935..c5bd687bd69 100644 } } -@@ -4481,11 +4523,20 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, +@@ -3974,10 +4069,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo + VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList3 *iface, + UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; +@@ -4002,10 +4097,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic + VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList3 *iface, + const FLOAT blend_factor[4]) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); +@@ -4014,10 +4109,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics + VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList3 *iface, + UINT stencil_ref) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); +@@ -4026,11 +4121,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC + VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList3 *iface, + ID3D12PipelineState *pipeline_state) + { + struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); + +@@ -4081,10 +4176,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA + return 0; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList3 *iface, + UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + bool have_aliasing_barriers = false, have_split_barriers = false; + const struct vkd3d_vk_device_procs *vk_procs; + const struct vkd3d_vulkan_info *vk_info; +@@ -4307,13 +4402,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC + WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList3 *iface, + ID3D12GraphicsCommandList *command_list) + { + FIXME("iface %p, command_list %p stub!\n", iface, command_list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList3 *iface, + UINT heap_count, ID3D12DescriptorHeap *const *heaps) + { + TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); +@@ -4339,10 +4434,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis + d3d12_command_list_invalidate_root_parameters(list, bind_point); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList3 *iface, + ID3D12RootSignature *root_signature) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + +@@ -4350,10 +4445,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G + unsafe_impl_from_ID3D12RootSignature(root_signature)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList3 *iface, + ID3D12RootSignature *root_signature) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + +@@ -4366,6 +4461,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l + { + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct d3d12_root_signature *root_signature = bindings->root_signature; ++ struct d3d12_descriptor_heap *descriptor_heap; + struct d3d12_desc *desc; + + assert(root_signature_get_descriptor_table(root_signature, index)); +@@ -4376,15 +4472,25 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l + if (bindings->descriptor_tables[index] == desc) + return; + ++ descriptor_heap = d3d12_desc_get_descriptor_heap(desc); ++ if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) ++ { ++ /* GetGPUDescriptorHandleForHeapStart() returns a null handle in this case, ++ * but a CPU handle could be passed. */ ++ WARN("Descriptor heap %p is not shader visible.\n", descriptor_heap); ++ return; ++ } ++ command_list_add_descriptor_heap(list, descriptor_heap); ++ + bindings->descriptor_tables[index] = desc; + bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; + bindings->descriptor_table_active_mask |= (uint64_t)1 << index; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", + iface, root_parameter_index, base_descriptor.ptr); +@@ -4393,10 +4499,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I + root_parameter_index, base_descriptor); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", + iface, root_parameter_index, base_descriptor.ptr); +@@ -4418,10 +4524,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis + c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); +@@ -4430,10 +4536,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 + root_parameter_index, dst_offset, 1, &data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); +@@ -4442,10 +4548,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID + root_parameter_index, dst_offset, 1, &data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); +@@ -4454,10 +4560,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID + root_parameter_index, dst_offset, constant_count, data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); +@@ -4481,11 +4587,20 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, root_parameter = root_signature_get_root_descriptor(root_signature, index); assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); @@ -28212,7 +39130,31 @@ index 4e2d4295935..c5bd687bd69 100644 if (vk_info->KHR_push_descriptor) { -@@ -4547,13 +4598,13 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li +@@ -4510,9 +4625,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4521,9 +4636,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4547,13 +4662,13 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); /* FIXME: Re-use buffer views. */ @@ -28228,7 +39170,68 @@ index 4e2d4295935..c5bd687bd69 100644 { ERR("Failed to add buffer view.\n"); VK_CALL(vkDestroyBufferView(vk_device, vk_buffer_view, NULL)); -@@ -4644,6 +4695,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics +@@ -4582,9 +4697,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4594,9 +4709,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4606,9 +4721,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4618,9 +4733,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( +- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4629,10 +4744,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV + root_parameter_index, address); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList3 *iface, + const D3D12_INDEX_BUFFER_VIEW *view) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_resource *resource; + enum VkIndexType index_type; +@@ -4644,6 +4759,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics WARN("Ignoring NULL index buffer view.\n"); return; } @@ -28240,7 +39243,47 @@ index 4e2d4295935..c5bd687bd69 100644 vk_procs = &list->device->vk_procs; -@@ -4844,7 +4900,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi +@@ -4667,10 +4787,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics + view->BufferLocation - resource->gpu_address, index_type)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList3 *iface, + UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct vkd3d_null_resources *null_resources; + struct vkd3d_gpu_va_allocator *gpu_va_allocator; + VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; +@@ -4725,10 +4845,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + d3d12_command_list_invalidate_current_pipeline(list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList3 *iface, + UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; + VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; + VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; +@@ -4790,11 +4910,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm + VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList3 *iface, + UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, + BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct d3d12_rtv_desc *rtv_desc; + const struct d3d12_dsv_desc *dsv_desc; + VkFormat prev_dsv_format; +@@ -4844,7 +4964,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi WARN("Failed to add view.\n"); } @@ -28249,7 +39292,7 @@ index 4e2d4295935..c5bd687bd69 100644 list->fb_width = max(list->fb_width, rtv_desc->width); list->fb_height = max(list->fb_height, rtv_desc->height); list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); -@@ -4868,7 +4924,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi +@@ -4868,7 +4988,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi list->dsv = VK_NULL_HANDLE; } @@ -28258,7 +39301,7 @@ index 4e2d4295935..c5bd687bd69 100644 list->fb_width = max(list->fb_width, dsv_desc->width); list->fb_height = max(list->fb_height, dsv_desc->height); list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); -@@ -4960,7 +5016,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, +@@ -4960,7 +5080,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, fb_desc.flags = 0; fb_desc.renderPass = vk_render_pass; fb_desc.attachmentCount = 1; @@ -28267,7 +39310,35 @@ index 4e2d4295935..c5bd687bd69 100644 fb_desc.width = width; fb_desc.height = height; fb_desc.layers = layer_count; -@@ -5163,13 +5219,14 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea +@@ -4995,12 +5115,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList3 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, + UINT rect_count, const D3D12_RECT *rects) + { + const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference ds_reference; +@@ -5044,10 +5164,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra + &clear_value, rect_count, rects); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList3 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference color_reference; +@@ -5163,13 +5283,14 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea } static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, @@ -28283,7 +39354,7 @@ index 4e2d4295935..c5bd687bd69 100644 VkDescriptorImageInfo image_info; D3D12_RECT full_rect, curr_rect; VkWriteDescriptorSet write_set; -@@ -5181,8 +5238,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, +@@ -5181,8 +5302,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, d3d12_command_list_invalidate_bindings(list, list->state); d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); @@ -28294,10 +39365,17 @@ index 4e2d4295935..c5bd687bd69 100644 clear_args.colour = *clear_colour; -@@ -5295,10 +5353,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID +@@ -5290,15 +5412,16 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList3 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; - struct vkd3d_view *view, *uint_view = NULL; @@ -28307,7 +39385,7 @@ index 4e2d4295935..c5bd687bd69 100644 struct d3d12_resource *resource_impl; VkClearColorValue colour; -@@ -5306,7 +5365,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID +@@ -5306,7 +5429,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); @@ -28318,7 +39396,7 @@ index 4e2d4295935..c5bd687bd69 100644 memcpy(colour.uint32, values, sizeof(colour.uint32)); if (view->format->type != VKD3D_FORMAT_TYPE_UINT) -@@ -5320,8 +5381,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID +@@ -5320,8 +5445,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID if (d3d12_resource_is_buffer(resource_impl)) { @@ -28329,7 +39407,7 @@ index 4e2d4295935..c5bd687bd69 100644 { ERR("Failed to create buffer view.\n"); return; -@@ -5337,16 +5398,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID +@@ -5337,26 +5462,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID view_desc.layer_idx = view->info.texture.layer_idx; view_desc.layer_count = view->info.texture.layer_count; @@ -28350,7 +39428,19 @@ index 4e2d4295935..c5bd687bd69 100644 if (uint_view) vkd3d_view_decref(uint_view, device); -@@ -5365,7 +5427,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList3 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const float values[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *resource_impl; + VkClearColorValue colour; + struct vkd3d_view *view; +@@ -5365,22 +5491,23 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); @@ -28360,7 +39450,231 @@ index 4e2d4295935..c5bd687bd69 100644 memcpy(colour.float32, values, sizeof(colour.float32)); d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); -@@ -5906,6 +5969,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) + { + FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList3 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + VkQueryControlFlags flags = 0; +@@ -5407,10 +5534,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman + VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList3 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + +@@ -5452,12 +5579,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) + return sizeof(uint64_t); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList3 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, + ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) + { + const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i, first, count; +@@ -5533,10 +5660,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); + const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -5605,19 +5732,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList3 *iface, + UINT metadata, const void *data, UINT size) + { + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList3 *iface, + UINT metadata, const void *data, UINT size) + { + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) ++static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList3 *iface) + { + FIXME("iface %p stub!\n", iface); + } +@@ -5626,14 +5753,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN + STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); + STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); + +-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList3 *iface, + ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, + UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) + { + struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); + struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); + struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; +@@ -5651,6 +5778,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC + return; + } + ++ d3d12_command_signature_incref(sig_impl); ++ + signature_desc = &sig_impl->desc; + for (i = 0; i < signature_desc->NumArgumentDescs; ++i) + { +@@ -5713,6 +5842,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC + if (!d3d12_command_list_update_compute_state(list)) + { + WARN("Failed to update compute state, ignoring dispatch.\n"); ++ d3d12_command_signature_decref(sig_impl); + return; + } + +@@ -5725,9 +5855,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC + break; + } + } ++ ++ d3d12_command_signature_decref(sig_impl); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, +@@ -5740,7 +5872,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, +@@ -5753,20 +5885,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList3 *iface, + FLOAT min, FLOAT max) + { + FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList3 *iface, + UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) + { + FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", + iface, sample_count, pixel_count, sample_positions); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList3 *iface, + ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, + ID3D12Resource *src_resource, UINT src_sub_resource_idx, + D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) +@@ -5778,16 +5910,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 + src_resource, src_sub_resource_idx, src_rect, format, mode); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) ++static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList3 *iface, UINT mask) + { + FIXME("iface %p, mask %#x stub!\n", iface, mask); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList3 *iface, + UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, + const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); + struct d3d12_resource *resource; + unsigned int i; + +@@ -5800,7 +5932,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap + } + } + +-static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = ++static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList3 *iface, ++ ID3D12ProtectedResourceSession *protected_session) ++{ ++ FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); ++} ++ ++static const struct ID3D12GraphicsCommandList3Vtbl d3d12_command_list_vtbl = + { + /* IUnknown methods */ + d3d12_command_list_QueryInterface, +@@ -5876,6 +6014,8 @@ static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = + d3d12_command_list_SetViewInstanceMask, + /* ID3D12GraphicsCommandList2 methods */ + d3d12_command_list_WriteBufferImmediate, ++ /* ID3D12GraphicsCommandList3 methods */ ++ d3d12_command_list_SetProtectedResourceSession, + }; + + static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) +@@ -5883,7 +6023,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma + if (!iface) + return NULL; + assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); +- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); + } + + static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, +@@ -5892,7 +6032,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d + { + HRESULT hr; + +- list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; ++ list->ID3D12GraphicsCommandList3_iface.lpVtbl = &d3d12_command_list_vtbl; + list->refcount = 1; + + list->type = type; +@@ -5906,6 +6046,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors : d3d12_command_list_update_descriptors; @@ -28368,7 +39682,223 @@ index 4e2d4295935..c5bd687bd69 100644 if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) { -@@ -6199,6 +6263,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm +@@ -5999,8 +6140,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if + return refcount; + } + ++static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) ++{ ++ switch (op->opcode) ++ { ++ case VKD3D_CS_OP_WAIT: ++ d3d12_fence_decref(op->u.wait.fence); ++ break; ++ ++ case VKD3D_CS_OP_SIGNAL: ++ d3d12_fence_decref(op->u.signal.fence); ++ break; ++ ++ case VKD3D_CS_OP_EXECUTE: ++ vkd3d_free(op->u.execute.buffers); ++ break; ++ ++ case VKD3D_CS_OP_UPDATE_MAPPINGS: ++ case VKD3D_CS_OP_COPY_MAPPINGS: ++ break; ++ } ++} ++ + static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) + { ++ unsigned int i; ++ ++ for (i = 0; i < array->count; ++i) ++ d3d12_command_queue_destroy_op(&array->ops[i]); ++ + vkd3d_free(array->ops); + } + +@@ -6098,17 +6266,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc + return &array->ops[array->count++]; + } + ++static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) ++{ ++ void *buffer; ++ ++ *dst = NULL; ++ if (src) ++ { ++ if (!(buffer = vkd3d_calloc(count, elem_size))) ++ return false; ++ memcpy(buffer, src, count * elem_size); ++ *dst = buffer; ++ } ++ return true; ++} ++ ++static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) ++{ ++ vkd3d_free(update_mappings->region_start_coordinates); ++ vkd3d_free(update_mappings->region_sizes); ++ vkd3d_free(update_mappings->range_flags); ++ vkd3d_free(update_mappings->heap_range_offsets); ++ vkd3d_free(update_mappings->range_tile_counts); ++} ++ + static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, + ID3D12Resource *resource, UINT region_count, + const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, + ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, + const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) + { +- FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " ++ struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); ++ struct vkd3d_cs_update_mappings update_mappings = {0}; ++ struct vkd3d_cs_op_data *op; ++ ++ TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " + "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " +- "range_tile_counts %p, flags %#x stub!\n", ++ "range_tile_counts %p, flags %#x.\n", + iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, + range_flags, heap_range_offsets, range_tile_counts, flags); ++ ++ if (!region_count || !range_count) ++ return; ++ ++ if (!command_queue->supports_sparse_binding) ++ { ++ FIXME("Command queue %p does not support sparse binding.\n", command_queue); ++ return; ++ } ++ ++ if (!resource_impl->tiles.subresource_count) ++ { ++ WARN("Resource %p is not a tiled resource.\n", resource_impl); ++ return; ++ } ++ ++ if (region_count > 1 && !region_start_coordinates) ++ { ++ WARN("Region start coordinates must not be NULL when region count is > 1.\n"); ++ return; ++ } ++ ++ if (range_count > 1 && !range_tile_counts) ++ { ++ WARN("Range tile counts must not be NULL when range count is > 1.\n"); ++ return; ++ } ++ ++ update_mappings.resource = resource_impl; ++ update_mappings.heap = heap_impl; ++ if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, ++ region_start_coordinates, sizeof(*region_start_coordinates), region_count)) ++ { ++ ERR("Failed to allocate region start coordinates.\n"); ++ return; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.region_sizes, ++ region_sizes, sizeof(*region_sizes), region_count)) ++ { ++ ERR("Failed to allocate region sizes.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.range_flags, ++ range_flags, sizeof(*range_flags), range_count)) ++ { ++ ERR("Failed to allocate range flags.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, ++ heap_range_offsets, sizeof(*heap_range_offsets), range_count)) ++ { ++ ERR("Failed to allocate heap range offsets.\n"); ++ goto free_clones; ++ } ++ if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, ++ range_tile_counts, sizeof(*range_tile_counts), range_count)) ++ { ++ ERR("Failed to allocate range tile counts.\n"); ++ goto free_clones; ++ } ++ update_mappings.region_count = region_count; ++ update_mappings.range_count = range_count; ++ update_mappings.flags = flags; ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ goto unlock_mutex; ++ } ++ ++ op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; ++ op->u.update_mappings = update_mappings; ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++ return; ++ ++unlock_mutex: ++ vkd3d_mutex_unlock(&command_queue->op_mutex); ++free_clones: ++ update_mappings_cleanup(&update_mappings); + } + + static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, +@@ -6119,10 +6401,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command + const D3D12_TILE_REGION_SIZE *region_size, + D3D12_TILE_MAPPING_FLAGS flags) + { +- FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " +- "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", ++ struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); ++ struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); ++ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); ++ struct vkd3d_cs_op_data *op; ++ ++ TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " ++ "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", + iface, dst_resource, dst_region_start_coordinate, src_resource, + src_region_start_coordinate, region_size, flags); ++ ++ vkd3d_mutex_lock(&command_queue->op_mutex); ++ ++ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) ++ { ++ ERR("Failed to add op.\n"); ++ return; ++ } ++ op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; ++ op->u.copy_mappings.dst_resource = dst_resource_impl; ++ op->u.copy_mappings.src_resource = src_resource_impl; ++ op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; ++ op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; ++ op->u.copy_mappings.region_size = *region_size; ++ op->u.copy_mappings.flags = flags; ++ ++ d3d12_command_queue_submit_locked(command_queue); ++ ++ vkd3d_mutex_unlock(&command_queue->op_mutex); + } + + static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, +@@ -6150,8 +6456,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu + ERR("Failed to submit queue(s), vr %d.\n", vr); + + vkd3d_queue_release(vkd3d_queue); +- +- vkd3d_free(buffers); + } + + static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) +@@ -6199,6 +6503,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm return; } @@ -28377,8 +39907,110 @@ index 4e2d4295935..c5bd687bd69 100644 buffers[i] = cmd_list->vk_command_buffer; } +@@ -6207,7 +6513,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); +- return; ++ goto done; + } + op->opcode = VKD3D_CS_OP_EXECUTE; + op->u.execute.buffers = buffers; +@@ -6215,6 +6521,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + + d3d12_command_queue_submit_locked(command_queue); + ++done: + vkd3d_mutex_unlock(&command_queue->op_mutex); + return; + } +@@ -6282,6 +6589,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { ++ ERR("Failed to add op.\n"); + hr = E_OUTOFMEMORY; + goto done; + } +@@ -6620,6 +6928,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { ++ ERR("Failed to add op.\n"); + hr = E_OUTOFMEMORY; + goto done; + } +@@ -6856,22 +7165,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + return d3d12_command_queue_fixup_after_flush_locked(queue); + } + d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); +- d3d12_fence_decref(fence); + break; + + case VKD3D_CS_OP_SIGNAL: + d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); +- d3d12_fence_decref(op->u.signal.fence); + break; + + case VKD3D_CS_OP_EXECUTE: + d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); + break; + ++ case VKD3D_CS_OP_UPDATE_MAPPINGS: ++ FIXME("Tiled resource binding is not supported yet.\n"); ++ update_mappings_cleanup(&op->u.update_mappings); ++ break; ++ ++ case VKD3D_CS_OP_COPY_MAPPINGS: ++ FIXME("Tiled resource mapping copying is not supported yet.\n"); ++ break; ++ + default: + vkd3d_unreachable(); + } + ++ d3d12_command_queue_destroy_op(op); ++ + *flushed_any |= true; + } + +@@ -6934,6 +7252,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, + if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) + goto fail_destroy_op_mutex; + ++ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); ++ + d3d12_device_add_ref(queue->device = device); + + return S_OK; +@@ -7039,16 +7359,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSign + TRACE("%p decreasing refcount to %u.\n", signature, refcount); + + if (!refcount) +- { +- struct d3d12_device *device = signature->device; +- +- vkd3d_private_store_destroy(&signature->private_store); +- +- vkd3d_free((void *)signature->desc.pArgumentDescs); +- vkd3d_free(signature); +- +- d3d12_device_release(device); +- } ++ d3d12_command_signature_decref(signature); + + return refcount; + } +@@ -7155,6 +7466,7 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_ + + object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; + object->refcount = 1; ++ object->internal_refcount = 1; + + object->desc = *desc; + if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 39a5ca013c7..4263dcf4184 100644 +index 39a5ca013c7..c33061073a3 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -19,6 +19,8 @@ @@ -28390,7 +40022,16 @@ index 39a5ca013c7..4263dcf4184 100644 struct vkd3d_struct { enum vkd3d_structure_type type; -@@ -2393,9 +2395,23 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) +@@ -1462,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + + vulkan_info->device_limits = physical_device_info->properties2.properties.limits; + vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; ++ vulkan_info->sparse_binding = features->sparseBinding; ++ vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; + vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; + vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; + vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; +@@ -2393,9 +2397,23 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); } @@ -28416,7 +40057,7 @@ index 39a5ca013c7..4263dcf4184 100644 pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -@@ -2412,8 +2428,27 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, +@@ -2412,20 +2430,44 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); @@ -28425,34 +40066,73 @@ index 39a5ca013c7..4263dcf4184 100644 +static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) +{ -+ cache->head = NULL; ++ memset(cache, 0, sizeof(*cache)); + cache->size = size; +} + +static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) +{ + union d3d12_desc_object u; ++ unsigned int i; + void *next; + -+ for (u.object = cache->head; u.object; u.object = next) ++ for (i = 0; i < ARRAY_SIZE(cache->heads); ++i) + { -+ next = u.header->next; -+ vkd3d_free(u.object); ++ for (u.object = cache->heads[i].head; u.object; u.object = next) ++ { ++ next = u.header->next; ++ vkd3d_free(u.object); ++ } + } +} + /* ID3D12Device */ - static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) +-static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) ++static inline struct d3d12_device *impl_from_ID3D12Device1(ID3D12Device1 *iface) { -@@ -2454,7 +2489,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) +- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device1_iface); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *iface, + REFIID riid, void **object) { - struct d3d12_device *device = impl_from_ID3D12Device(iface); + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + +- if (IsEqualGUID(riid, &IID_ID3D12Device) ++ if (IsEqualGUID(riid, &IID_ID3D12Device1) ++ || IsEqualGUID(riid, &IID_ID3D12Device) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { +@@ -2440,9 +2482,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + ULONG refcount = InterlockedIncrement(&device->refcount); + + TRACE("%p increasing refcount to %u.\n", device, refcount); +@@ -2450,11 +2492,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) + return refcount; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); ULONG refcount = InterlockedDecrement(&device->refcount); - size_t i; TRACE("%p decreasing refcount to %u.\n", device, refcount); -@@ -2474,8 +2508,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) +@@ -2474,8 +2515,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); @@ -28463,7 +40143,271 @@ index 39a5ca013c7..4263dcf4184 100644 VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); -@@ -3368,132 +3402,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, +@@ -2487,10 +2528,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) + return refcount; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2498,10 +2539,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface + return vkd3d_get_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2509,19 +2550,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface + return vkd3d_set_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device1 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&device->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, const WCHAR *name) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); + +@@ -2529,17 +2570,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device *iface) ++static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device1 *iface) + { + TRACE("iface %p.\n", iface); + + return 1; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 *iface, + const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_queue *object; + HRESULT hr; + +@@ -2553,10 +2594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *i + riid, command_queue); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device1 *iface, + D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_allocator *object; + HRESULT hr; + +@@ -2570,10 +2611,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic + riid, command_allocator); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device1 *iface, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2587,10 +2628,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device1 *iface, + const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2604,11 +2645,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device1 *iface, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, + ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_list *object; + HRESULT hr; + +@@ -2621,8 +2662,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *if + initial_pipeline_state, &object))) + return hr; + +- return return_interface(&object->ID3D12GraphicsCommandList2_iface, +- &IID_ID3D12GraphicsCommandList2, riid, command_list); ++ return return_interface(&object->ID3D12GraphicsCommandList3_iface, ++ &IID_ID3D12GraphicsCommandList3, riid, command_list); + } + + /* Direct3D feature levels restrict which formats can be optionally supported. */ +@@ -2731,10 +2772,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) + return true; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 *iface, + D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", + iface, feature, feature_data, feature_data_size); +@@ -3233,10 +3274,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device * + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 *iface, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_descriptor_heap *object; + HRESULT hr; + +@@ -3250,7 +3291,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device + &IID_ID3D12DescriptorHeap, riid, descriptor_heap); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device *iface, ++static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device1 *iface, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { + TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); +@@ -3273,11 +3314,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 *iface, + UINT node_mask, const void *bytecode, SIZE_T bytecode_length, + REFIID riid, void **root_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_root_signature *object; + HRESULT hr; + +@@ -3293,10 +3334,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * + &IID_ID3D12RootSignature, riid, root_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device1 *iface, + const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); +@@ -3305,11 +3346,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device1 *iface, + ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", +@@ -3319,11 +3360,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device1 *iface, + ID3D12Resource *resource, ID3D12Resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", +@@ -3334,7 +3375,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 *iface, + ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3342,10 +3383,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device * + iface, resource, desc, descriptor.ptr); + + d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 *iface, + ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3353,13 +3394,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * + iface, resource, desc, descriptor.ptr); + + d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, + const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); +@@ -3368,142 +3409,17 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } @@ -28593,10 +40537,23 @@ index 39a5ca013c7..4263dcf4184 100644 - -#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 - - static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, -@@ -3525,15 +3433,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, + const UINT *src_descriptor_range_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; + unsigned int dst_range_size, src_range_size; ++ struct d3d12_descriptor_heap *dst_heap; + const struct d3d12_desc *src; + struct d3d12_desc *dst; + +@@ -3525,15 +3441,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, if (!dst_descriptor_range_count) return; @@ -28612,8 +40569,11 @@ index 39a5ca013c7..4263dcf4184 100644 dst_range_idx = dst_idx = 0; src_range_idx = src_idx = 0; while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) -@@ -3544,8 +3443,12 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, +@@ -3542,10 +3449,15 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; + dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); ++ dst_heap = d3d12_desc_get_descriptor_heap(dst); src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - while (dst_idx < dst_range_size && src_idx < src_range_size) @@ -28622,12 +40582,21 @@ index 39a5ca013c7..4263dcf4184 100644 + { + if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) + continue; -+ d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); ++ d3d12_desc_copy(&dst[dst_idx], &src[src_idx], dst_heap, device); + } if (dst_idx >= dst_range_size) { -@@ -3570,17 +3473,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i +@@ -3560,7 +3472,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + } + } + +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 *iface, + UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, + const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) +@@ -3570,26 +3482,15 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, descriptor_heap_type); @@ -28645,15 +40614,322 @@ index 39a5ca013c7..4263dcf4184 100644 d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); } -@@ -4080,7 +3972,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + + static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( +- ID3D12Device *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, ++ ID3D12Device1 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + UINT count, const D3D12_RESOURCE_DESC *resource_descs) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + const D3D12_RESOURCE_DESC *desc; + uint64_t requested_alignment; + +@@ -3662,10 +3563,10 @@ invalid: + return info; + } + +-static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device *iface, ++static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device1 *iface, + D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + bool coherent; + + TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", +@@ -3705,12 +3606,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope + return heap_properties; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device1 *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_resource *object; + HRESULT hr; + +@@ -3729,10 +3630,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, + const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_heap *object; + HRESULT hr; + +@@ -3748,12 +3649,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, + return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 *iface, + ID3D12Heap *heap, UINT64 heap_offset, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_heap *heap_object; + struct d3d12_resource *object; + HRESULT hr; +@@ -3772,11 +3673,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device1 *iface, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_resource *object; + HRESULT hr; + +@@ -3790,11 +3691,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 *iface, + ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, + const WCHAR *name, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", + iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); +@@ -3802,7 +3703,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *i + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *iface, + HANDLE handle, REFIID riid, void **object) + { + FIXME("iface %p, handle %p, riid %s, object %p stub!\n", +@@ -3811,10 +3712,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *ifa + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device1 *iface, + const WCHAR *name, DWORD access, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + FIXME("iface %p, name %s, access %#x, handle %p stub!\n", + iface, debugstr_w(name, device->wchar_size), access, handle); +@@ -3822,7 +3723,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", +@@ -3831,7 +3732,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", +@@ -3840,10 +3741,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, + UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_fence *object; + HRESULT hr; + +@@ -3853,24 +3754,24 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, + if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) + return hr; + +- return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); ++ return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device1 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p.\n", iface); + + return device->removed_reason; + } + +-static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 *iface, + const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, + UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, + UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; + unsigned int width, height, depth, plane_count, sub_resources_per_plane; +@@ -3950,10 +3851,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i + *total_bytes = total; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *iface, + const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_query_heap *object; + HRESULT hr; + +@@ -3966,18 +3867,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *ifac + return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device *iface, BOOL enable) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device1 *iface, BOOL enable) + { + FIXME("iface %p, enable %#x stub!\n", iface, enable); + + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device1 *iface, + const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, + REFIID iid, void **command_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + struct d3d12_command_signature *object; + HRESULT hr; + +@@ -3991,23 +3892,29 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic + &IID_ID3D12CommandSignature, iid, command_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *iface, + ID3D12Resource *resource, UINT *total_tile_count, + D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings) + { +- FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " ++ const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ ++ TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + "standard_title_shape %p, sub_resource_tiling_count %p, " +- "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", ++ "first_sub_resource_tiling %u, sub_resource_tilings %p.\n", + iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, + sub_resource_tiling_count, first_sub_resource_tiling, + sub_resource_tilings); ++ ++ d3d12_resource_get_tiling(device, resource_impl, total_tile_count, packed_mip_info, standard_tile_shape, ++ sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); + } + +-static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) ++static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface, LUID *luid) + { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device1(iface); + + TRACE("iface %p, luid %p.\n", iface, luid); + +@@ -4016,7 +3923,33 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, + return luid; + } + +-static const struct ID3D12DeviceVtbl d3d12_device_vtbl = ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device1 *iface, ++ const void *blob, SIZE_T blob_size, REFIID iid, void **lib) ++{ ++ FIXME("iface %p, blob %p, blob_size %lu, iid %s, lib %p stub!\n", iface, blob, blob_size, debugstr_guid(iid), lib); ++ ++ return DXGI_ERROR_UNSUPPORTED; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device1 *iface, ++ ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, ++ D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) ++{ ++ FIXME("iface %p, fences %p, values %p, fence_count %u, flags %#x, event %p stub!\n", ++ iface, fences, values, fence_count, flags, event); ++ ++ return E_NOTIMPL; ++} ++ ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 *iface, ++ UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) ++{ ++ FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); ++ ++ return S_OK; ++} ++ ++static const struct ID3D12Device1Vtbl d3d12_device_vtbl = + { + /* IUnknown methods */ + d3d12_device_QueryInterface, +@@ -4065,14 +3998,18 @@ static const struct ID3D12DeviceVtbl d3d12_device_vtbl = + d3d12_device_CreateCommandSignature, + d3d12_device_GetResourceTiling, + d3d12_device_GetAdapterLuid, ++ /* ID3D12Device1 methods */ ++ d3d12_device_CreatePipelineLibrary, ++ d3d12_device_SetEventOnMultipleFenceCompletion, ++ d3d12_device_SetResidencyPriority, + }; + +-struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) ++struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface) + { + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_device_vtbl); +- return impl_from_ID3D12Device(iface); ++ return impl_from_ID3D12Device1(iface); + } + + static HRESULT d3d12_device_init(struct d3d12_device *device, +@@ -4080,9 +4017,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - size_t i; - device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; +- device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; ++ device->ID3D12Device1_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; -@@ -4123,10 +4014,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + + vkd3d_instance_incref(device->vkd3d_instance = instance); +@@ -4123,10 +4059,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, device->blocked_queue_count = 0; vkd3d_mutex_init(&device->blocked_queues_mutex); @@ -28667,8 +40943,41 @@ index 39a5ca013c7..4263dcf4184 100644 if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent); +@@ -4279,28 +4215,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha + + IUnknown *vkd3d_get_device_parent(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->parent; + } + + VkDevice vkd3d_get_vk_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->vk_device; + } + + VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->vk_physical_device; + } + + struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); + + return d3d12_device->vkd3d_instance; + } diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 8c050cfeb32..ea7b6859cc1 100644 +index 8c050cfeb32..f3842958d96 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -326,6 +326,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap) @@ -28845,18 +41154,281 @@ index 8c050cfeb32..ea7b6859cc1 100644 return S_OK; } -@@ -1027,8 +968,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 +@@ -838,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, + VkImageFormatListCreateInfoKHR format_list; + const struct vkd3d_format *format; + VkImageCreateInfo image_info; ++ uint32_t count; + VkResult vr; + + if (resource) +@@ -973,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, + if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) + resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; + ++ if (sparse_resource) ++ { ++ count = 0; ++ VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, ++ image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); ++ ++ if (!count) ++ { ++ FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", ++ image_info.format, image_info.imageType, image_info.samples, image_info.usage); ++ return E_INVALIDARG; ++ } ++ } ++ + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) + WARN("Failed to create Vulkan image, vr %d.\n", vr); + +@@ -987,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + D3D12_RESOURCE_DESC validated_desc; + VkMemoryRequirements requirements; + VkImage vk_image; ++ bool tiled; + HRESULT hr; + + assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); +@@ -999,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + desc = &validated_desc; + } + ++ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; ++ + /* XXX: We have to create an image to get its memory requirements. */ +- if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) ++ if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) + { + VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); + VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); +@@ -1012,6 +971,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + return hr; + } + ++static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) ++{ ++ vkd3d_free(resource->tiles.subresources); ++} ++ + static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +@@ -1027,8 +991,10 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 else VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); - if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) - d3d12_heap_destroy(resource->heap); ++ d3d12_resource_tile_info_cleanup(resource); ++ + if (resource->heap) + d3d12_heap_resource_destroyed(resource->heap); } static ULONG d3d12_resource_incref(struct d3d12_resource *resource) -@@ -1223,12 +1164,55 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, +@@ -1098,12 +1064,196 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, + box->back = d3d12_resource_desc_get_depth(&resource->desc, level); + } + +-/* ID3D12Resource */ +-static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) ++static void compute_image_subresource_size_in_tiles(const VkExtent3D *tile_extent, ++ const struct D3D12_RESOURCE_DESC *desc, unsigned int miplevel_idx, ++ struct vkd3d_tiled_region_extent *size) + { +- return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); ++ unsigned int width, height, depth; ++ ++ width = d3d12_resource_desc_get_width(desc, miplevel_idx); ++ height = d3d12_resource_desc_get_height(desc, miplevel_idx); ++ depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); ++ size->width = (width + tile_extent->width - 1) / tile_extent->width; ++ size->height = (height + tile_extent->height - 1) / tile_extent->height; ++ size->depth = (depth + tile_extent->depth - 1) / tile_extent->depth; + } + ++void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, ++ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, ++ UINT *subresource_tiling_count, UINT first_subresource_tiling, ++ D3D12_SUBRESOURCE_TILING *subresource_tilings) ++{ ++ unsigned int i, subresource, subresource_count, miplevel_idx, count; ++ const struct vkd3d_subresource_tile_info *tile_info; ++ const VkExtent3D *tile_extent; ++ ++ tile_extent = &resource->tiles.tile_extent; ++ ++ if (packed_mip_info) ++ { ++ packed_mip_info->NumStandardMips = resource->tiles.standard_mip_count; ++ packed_mip_info->NumPackedMips = resource->desc.MipLevels - packed_mip_info->NumStandardMips; ++ packed_mip_info->NumTilesForPackedMips = !!resource->tiles.packed_mip_tile_count; /* non-zero dummy value */ ++ packed_mip_info->StartTileIndexInOverallResource = packed_mip_info->NumPackedMips ++ ? resource->tiles.subresources[resource->tiles.standard_mip_count].offset : 0; ++ } ++ ++ if (standard_tile_shape) ++ { ++ /* D3D12 docs say tile shape is cleared to zero if there is no standard mip, but drivers don't to do this. */ ++ standard_tile_shape->WidthInTexels = tile_extent->width; ++ standard_tile_shape->HeightInTexels = tile_extent->height; ++ standard_tile_shape->DepthInTexels = tile_extent->depth; ++ } ++ ++ if (total_tile_count) ++ *total_tile_count = resource->tiles.total_count; ++ ++ if (!subresource_tiling_count) ++ return; ++ ++ subresource_count = resource->tiles.subresource_count; ++ ++ count = subresource_count - min(first_subresource_tiling, subresource_count); ++ count = min(count, *subresource_tiling_count); ++ ++ for (i = 0; i < count; ++i) ++ { ++ subresource = i + first_subresource_tiling; ++ miplevel_idx = subresource % resource->desc.MipLevels; ++ if (miplevel_idx >= resource->tiles.standard_mip_count) ++ { ++ memset(&subresource_tilings[i], 0, sizeof(subresource_tilings[i])); ++ subresource_tilings[i].StartTileIndexInOverallResource = D3D12_PACKED_TILE; ++ continue; ++ } ++ ++ tile_info = &resource->tiles.subresources[subresource]; ++ subresource_tilings[i].StartTileIndexInOverallResource = tile_info->offset; ++ subresource_tilings[i].WidthInTiles = tile_info->extent.width; ++ subresource_tilings[i].HeightInTiles = tile_info->extent.height; ++ subresource_tilings[i].DepthInTiles = tile_info->extent.depth; ++ } ++ *subresource_tiling_count = i; ++} ++ ++static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) ++{ ++ unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; ++ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; ++ VkSparseImageMemoryRequirements *sparse_requirements_array; ++ VkSparseImageMemoryRequirements sparse_requirements = {0}; ++ struct vkd3d_subresource_tile_info *tile_info; ++ VkMemoryRequirements requirements; ++ const VkExtent3D *tile_extent; ++ uint32_t requirement_count; ++ ++ subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); ++ ++ if (!(resource->tiles.subresources = vkd3d_calloc(subresource_count, sizeof(*resource->tiles.subresources)))) ++ { ++ ERR("Failed to allocate subresource info array.\n"); ++ return false; ++ } ++ ++ if (d3d12_resource_is_buffer(resource)) ++ { ++ assert(subresource_count == 1); ++ ++ VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); ++ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) ++ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); ++ ++ tile_info = &resource->tiles.subresources[0]; ++ tile_info->offset = 0; ++ tile_info->extent.width = align(resource->desc.Width, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) ++ / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; ++ tile_info->extent.height = 1; ++ tile_info->extent.depth = 1; ++ tile_info->count = tile_info->extent.width; ++ ++ resource->tiles.tile_extent.width = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; ++ resource->tiles.tile_extent.height = 1; ++ resource->tiles.tile_extent.depth = 1; ++ resource->tiles.total_count = tile_info->extent.width; ++ resource->tiles.subresource_count = 1; ++ resource->tiles.standard_mip_count = 1; ++ resource->tiles.packed_mip_tile_count = 0; ++ } ++ else ++ { ++ VK_CALL(vkGetImageMemoryRequirements(device->vk_device, resource->u.vk_image, &requirements)); ++ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) ++ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); ++ ++ requirement_count = 0; ++ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, &requirement_count, NULL)); ++ if (!(sparse_requirements_array = vkd3d_calloc(requirement_count, sizeof(*sparse_requirements_array)))) ++ { ++ ERR("Failed to allocate sparse requirements array.\n"); ++ return false; ++ } ++ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, ++ &requirement_count, sparse_requirements_array)); ++ ++ for (i = 0; i < requirement_count; ++i) ++ { ++ if (sparse_requirements_array[i].formatProperties.aspectMask & resource->format->vk_aspect_mask) ++ { ++ if (sparse_requirements.formatProperties.aspectMask) ++ { ++ WARN("Ignoring properties for aspect mask %#x.\n", ++ sparse_requirements_array[i].formatProperties.aspectMask); ++ } ++ else ++ { ++ sparse_requirements = sparse_requirements_array[i]; ++ } ++ } ++ } ++ vkd3d_free(sparse_requirements_array); ++ if (!sparse_requirements.formatProperties.aspectMask) ++ { ++ WARN("Failed to get sparse requirements.\n"); ++ return false; ++ } ++ ++ resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; ++ resource->tiles.subresource_count = subresource_count; ++ resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize ++ ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; ++ resource->tiles.packed_mip_tile_count = (resource->tiles.standard_mip_count < resource->desc.MipLevels) ++ ? sparse_requirements.imageMipTailSize / requirements.alignment : 0; ++ ++ for (i = 0, start_idx = 0; i < subresource_count; ++i) ++ { ++ miplevel_idx = i % resource->desc.MipLevels; ++ ++ tile_extent = &sparse_requirements.formatProperties.imageGranularity; ++ tile_info = &resource->tiles.subresources[i]; ++ compute_image_subresource_size_in_tiles(tile_extent, &resource->desc, miplevel_idx, &tile_info->extent); ++ tile_info->offset = start_idx; ++ tile_info->count = 0; ++ ++ if (miplevel_idx < resource->tiles.standard_mip_count) ++ { ++ tile_count = tile_info->extent.width * tile_info->extent.height * tile_info->extent.depth; ++ start_idx += tile_count; ++ tile_info->count = tile_count; ++ } ++ else if (miplevel_idx == resource->tiles.standard_mip_count) ++ { ++ tile_info->count = 1; /* Non-zero dummy value */ ++ start_idx += 1; ++ } ++ } ++ resource->tiles.total_count = start_idx; ++ } ++ ++ return true; ++} ++ ++/* ID3D12Resource */ + static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, + REFIID riid, void **object) + { +@@ -1223,12 +1373,55 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, return d3d12_device_query_interface(resource->device, iid, device); } @@ -28913,7 +41485,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 TRACE("iface %p, sub_resource %u, read_range %p, data %p.\n", iface, sub_resource, read_range, data); -@@ -1259,15 +1243,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT +@@ -1259,15 +1452,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT return E_NOTIMPL; } @@ -28938,7 +41510,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT sub_resource, -@@ -1286,9 +1273,10 @@ static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT s +@@ -1286,9 +1482,10 @@ static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT s return; } @@ -28952,7 +41524,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static D3D12_RESOURCE_DESC * STDMETHODCALLTYPE d3d12_resource_GetDesc(ID3D12Resource *iface, -@@ -1320,10 +1308,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc +@@ -1320,10 +1517,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; @@ -28964,7 +41536,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 TRACE("iface %p, src_data %p, src_row_pitch %u, src_slice_pitch %u, " "dst_sub_resource %u, dst_box %s.\n", -@@ -1381,20 +1369,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc +@@ -1381,20 +1578,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); @@ -28991,7 +41563,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 return S_OK; } -@@ -1408,10 +1393,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour +@@ -1408,10 +1602,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; @@ -29003,7 +41575,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 TRACE("iface %p, dst_data %p, dst_row_pitch %u, dst_slice_pitch %u, " "src_sub_resource %u, src_box %s.\n", -@@ -1469,21 +1454,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour +@@ -1469,21 +1663,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); @@ -29019,10 +41591,10 @@ index 8c050cfeb32..ea7b6859cc1 100644 vk_layout.depthPitch, src_box->left, src_box->top, src_box->front); + src_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, src_box->right, src_box->bottom - 1, src_box->back - 1) - src_offset; ++ ++ d3d12_resource_invalidate(resource, src_offset, src_size); - vkd3d_format_copy_data(format, src_data, vk_layout.rowPitch, vk_layout.depthPitch, -+ d3d12_resource_invalidate(resource, src_offset, src_size); -+ + vkd3d_format_copy_data(format, src_data + src_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_data, dst_row_pitch, dst_slice_pitch, src_box->right - src_box->left, src_box->bottom - src_box->top, src_box->back - src_box->front); @@ -29032,7 +41604,51 @@ index 8c050cfeb32..ea7b6859cc1 100644 return S_OK; } -@@ -1941,6 +1923,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, +@@ -1679,6 +1870,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d + return E_INVALIDARG; + } + ++ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) ++ { ++ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) ++ { ++ WARN("The device does not support tiled 3D images.\n"); ++ return E_INVALIDARG; ++ } ++ if (format->plane_count > 1) ++ { ++ WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", ++ format->dxgi_format); ++ return E_INVALIDARG; ++ } ++ } ++ + if (!d3d12_resource_validate_texture_format(desc, format) + || !d3d12_resource_validate_texture_alignment(desc, format)) + return E_INVALIDARG; +@@ -1740,6 +1946,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + + resource->desc = *desc; + ++ if (!heap_properties && !device->vk_info.sparse_binding) ++ { ++ WARN("The device does not support tiled images.\n"); ++ return E_INVALIDARG; ++ } ++ + if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) + return E_INVALIDARG; + +@@ -1805,6 +2017,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 + resource->heap = NULL; + resource->heap_offset = 0; + ++ memset(&resource->tiles, 0, sizeof(resource->tiles)); ++ + if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) + { + d3d12_resource_destroy(resource, device); +@@ -1941,6 +2155,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, { resource->heap = heap; resource->heap_offset = heap_offset; @@ -29040,51 +41656,104 @@ index 8c050cfeb32..ea7b6859cc1 100644 } else { -@@ -2061,24 +2044,72 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) +@@ -1989,6 +2204,12 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + desc, initial_state, optimized_clear_value, &object))) + return hr; + ++ if (!d3d12_resource_init_tiles(object, device)) ++ { ++ d3d12_resource_Release(&object->ID3D12Resource_iface); ++ return E_OUTOFMEMORY; ++ } ++ + TRACE("Created reserved resource %p.\n", object); + + *resource = object; +@@ -1999,7 +2220,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + HRESULT vkd3d_create_image_resource(ID3D12Device *device, + const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) + { +- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device(device); ++ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device1((ID3D12Device1 *)device); + struct d3d12_resource *object; + HRESULT hr; + +@@ -2061,24 +2282,101 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); } -/* CBVs, SRVs, UAVs */ -static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) -+/* Objects are cached so that vkd3d_view_incref() can safely check the refcount -+ * of an object freed by another thread. */ ++#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1) ++ ++/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an ++ * object freed by another thread. This could be implemented as a single atomic linked ++ * list, but it requires handling the ABA problem, which brings issues with cross-platform ++ * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */ +static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) { - struct vkd3d_view *view; + union d3d12_desc_object u; -+ void *next; ++ unsigned int i; ++ ++ STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK)); - if ((view = vkd3d_malloc(sizeof(*view)))) -+ do ++ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; ++ for (;;) { - view->refcount = 1; - view->type = type; - view->serial_id = InterlockedIncrement64(&object_global_serial_id); - view->vk_counter_view = VK_NULL_HANDLE; -+ u.object = cache->head; -+ if (!u.object) ++ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) ++ { ++ if ((u.object = cache->heads[i].head)) ++ { ++ vkd3d_atomic_decrement(&cache->free_count); ++ cache->heads[i].head = u.header->next; ++ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ return u.object; ++ } ++ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ } ++ /* Keeping a free count avoids uncertainty over when this loop should terminate, ++ * which could result in excess allocations gradually increasing without limit. */ ++ if (cache->free_count < ARRAY_SIZE(cache->heads)) + return vkd3d_malloc(cache->size); -+ next = u.header->next; ++ ++ i = (i + 1) & HEAD_INDEX_MASK; } - return view; -+ while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); -+ -+ return u.object; -+} -+ + } + +-void vkd3d_view_incref(struct vkd3d_view *view) +static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) -+{ + { +- InterlockedIncrement(&view->refcount); + union d3d12_desc_object u = {object}; ++ unsigned int i; + void *head; + -+ do ++ /* Using the same index as above may result in a somewhat uneven distribution, ++ * but the main objective is to avoid costly spinlock contention. */ ++ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; ++ for (;;) + { -+ head = cache->head; -+ u.header->next = head; ++ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) ++ break; ++ i = (i + 1) & HEAD_INDEX_MASK; + } -+ while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); ++ ++ head = cache->heads[i].head; ++ u.header->next = head; ++ cache->heads[i].head = u.object; ++ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ vkd3d_atomic_increment(&cache->free_count); +} + ++#undef HEAD_INDEX_MASK ++ +static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) +{ + struct vkd3d_cbuffer_desc *desc; @@ -29097,13 +41766,11 @@ index 8c050cfeb32..ea7b6859cc1 100644 + desc->h.refcount = 1; + + return desc; - } - --void vkd3d_view_incref(struct vkd3d_view *view) ++} ++ +static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_descriptor_type, + enum vkd3d_view_type type, struct d3d12_device *device) - { -- InterlockedIncrement(&view->refcount); ++{ + struct vkd3d_view *view; + + assert(magic); @@ -29124,7 +41791,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) -@@ -2087,313 +2118,299 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev +@@ -2087,314 +2385,306 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev TRACE("Destroying view %p.\n", view); @@ -29398,16 +42065,25 @@ index 8c050cfeb32..ea7b6859cc1 100644 break; } if (is_null && device->vk_info.EXT_robustness2) -+ return d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, dst_array_element, writes, device); -+ -+ ++i; -+ if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view->v.vk_counter_view) - { +- { - d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, - descriptor_set->vk_descriptor_writes[0].dstArrayElement, device); - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); - return; -+ descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; +- } +- +- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); ++ return d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, dst_array_element, writes, device); + +- if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) ++ ++i; ++ if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view->v.vk_counter_view) + { + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; +- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; +- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; +- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; +- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; @@ -29420,50 +42096,58 @@ index 8c050cfeb32..ea7b6859cc1 100644 + writes->vk_descriptor_writes[i++].pTexelBufferView = &u.view->v.vk_counter_view; } -- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); -- -- if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) -+ if (i >= ARRAY_SIZE(writes->vk_descriptor_writes) - 1) - { -- descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; -- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; -- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; -- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; -- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); -+ VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); -+ descriptor_writes_free_object_refs(writes, device); -+ i = 0; - } - - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); -+ writes->count = i; - } - +-} +- -static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) -+void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) - { +-{ - struct vkd3d_view *defunct_view; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); +- +- if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) ++ if (i >= ARRAY_SIZE(writes->vk_descriptor_writes) - 1) + { +- d3d12_desc_copy_raw(dst, src); +- vkd3d_mutex_unlock(mutex); +- return; ++ VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); ++ descriptor_writes_free_object_refs(writes, device); ++ i = 0; + } + +- defunct_view = dst->s.u.view_info.view; +- d3d12_desc_copy_raw(dst, src); +- vkd3d_mutex_unlock(mutex); +- +- /* Destroy the view after unlocking to reduce wait time. */ +- vkd3d_view_destroy(defunct_view, device); ++ writes->count = i; + } + +-void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, +- struct d3d12_device *device) ++void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) + { +- struct vkd3d_view *defunct_view = NULL; +- struct vkd3d_mutex *mutex; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct d3d12_desc *descriptors, *src; + struct descriptor_writes writes; + union d3d12_desc_object u; + unsigned int i, next; -- if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) -- { -- d3d12_desc_copy_raw(dst, src); -- vkd3d_mutex_unlock(mutex); +- mutex = d3d12_device_get_descriptor_mutex(device, dst); +- vkd3d_mutex_lock(mutex); + if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) - return; -- } ++ return; -- defunct_view = dst->s.u.view_info.view; -- d3d12_desc_copy_raw(dst, src); -- vkd3d_mutex_unlock(mutex); +- /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ +- if ((dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +- && !InterlockedDecrement(&dst->s.u.view_info.view->refcount)) +- defunct_view = dst->s.u.view_info.view; + writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; + writes.null_vk_cbv_info.offset = 0; + writes.null_vk_cbv_info.range = VK_WHOLE_SIZE; @@ -29471,93 +42155,71 @@ index 8c050cfeb32..ea7b6859cc1 100644 + writes.count = 0; + writes.held_ref_count = 0; -- /* Destroy the view after unlocking to reduce wait time. */ -- vkd3d_view_destroy(defunct_view, device); --} -- --void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -- struct d3d12_device *device) --{ -- struct vkd3d_view *defunct_view = NULL; -- struct vkd3d_mutex *mutex; +- d3d12_desc_copy_raw(dst, src); + descriptors = (struct d3d12_desc *)descriptor_heap->descriptors; -- mutex = d3d12_device_get_descriptor_mutex(device, dst); -- vkd3d_mutex_lock(mutex); +- vkd3d_mutex_unlock(mutex); + for (; i != UINT_MAX; i = next) + { + src = &descriptors[i]; + next = (int)src->next >> 1; -- /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ -- if ((dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) -- && !InterlockedDecrement(&dst->s.u.view_info.view->refcount)) -- defunct_view = dst->s.u.view_info.view; +- /* Destroy the view after unlocking to reduce wait time. */ +- if (defunct_view) +- vkd3d_view_destroy(defunct_view, device); + u.object = d3d12_desc_get_object_ref(src, device); -- d3d12_desc_copy_raw(dst, src); +- if (device->use_vk_heaps && dst->s.magic) +- d3d12_desc_write_vk_heap(dst, src, device); +-} + if (!u.object) + { + vkd3d_atomic_exchange(&src->next, 0); + continue; + } -- vkd3d_mutex_unlock(mutex); +-static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +-{ +- static const struct d3d12_desc null_desc = {0}; + writes.held_refs[writes.held_ref_count++] = u.object; + d3d12_desc_write_vk_heap(descriptor_heap, i, &writes, u.object, device); - -- /* Destroy the view after unlocking to reduce wait time. */ -- if (defunct_view) -- vkd3d_view_destroy(defunct_view, device); ++ + vkd3d_atomic_exchange(&src->next, 0); + } -- if (device->use_vk_heaps && dst->s.magic) -- d3d12_desc_write_vk_heap(dst, src, device); +- d3d12_desc_write_atomic(descriptor, &null_desc, device); + /* Avoid thunk calls wherever possible. */ + if (writes.count) + VK_CALL(vkUpdateDescriptorSets(device->vk_device, writes.count, writes.vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(&writes, device); } --static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) -+static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) +-void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, +- struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, +- struct d3d12_device *device) ++static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_descriptor_heap *descriptor_heap) { -- static const struct d3d12_desc null_desc = {0}; -+ struct d3d12_descriptor_heap *descriptor_heap; +- struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; +- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +- unsigned int i, write_count; + unsigned int i, head; -+ + +- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); + i = dst->index; -+ descriptor_heap = d3d12_desc_get_descriptor_heap(dst); + head = descriptor_heap->dirty_list_head; -- d3d12_desc_write_atomic(descriptor, &null_desc, device); +- for (i = 0, write_count = 0; i < info->count; ++i) + /* Only one thread can swap the value away from zero. */ + if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) + return; + /* Now it is safe to modify 'next' to another nonzero value if necessary. */ + while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) -+ { + { +- d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); + head = descriptor_heap->dirty_list_head; + vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); + } - } - --void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, -- struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, -+void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) - { -- struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; -- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- unsigned int i, write_count; -- -- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); -- -- for (i = 0, write_count = 0; i < info->count; ++i) -- { -- d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); -+ void *object = src->s.u.object; ++} - if (i && locations[i].dst == locations[i - 1].dst + 1) - { @@ -29577,9 +42239,17 @@ index 8c050cfeb32..ea7b6859cc1 100644 - - if (!info->uav_counter) - goto done; -- ++static inline void descriptor_heap_write_atomic(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_desc *dst, ++ const struct d3d12_desc *src, struct d3d12_device *device) ++{ ++ void *object = src->s.u.object; + - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; -- ++ d3d12_desc_replace(dst, object, device); ++ if (descriptor_heap->use_vk_heaps && object && !dst->next) ++ d3d12_desc_mark_as_modified(dst, descriptor_heap); ++} + - for (i = 0, write_count = 0; i < info->count; ++i) - { - if (!locations[i].src.s.u.view_info.view->vk_counter_view) @@ -29592,9 +42262,10 @@ index 8c050cfeb32..ea7b6859cc1 100644 - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); -+ d3d12_desc_replace(dst, object, device); -+ if (device->use_vk_heaps && object && !dst->next) -+ d3d12_desc_mark_as_modified(dst); ++void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, ++ struct d3d12_device *device) ++{ ++ descriptor_heap_write_atomic(d3d12_desc_get_descriptor_heap(dst), dst, src, device); +} -done: @@ -29604,7 +42275,10 @@ index 8c050cfeb32..ea7b6859cc1 100644 + d3d12_desc_replace(descriptor, NULL, device); } - void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, +-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, ++/* This is a major performance bottleneck for some games, so do not load the device ++ * pointer from dst_heap. In some cases device will not be used. */ ++void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, struct d3d12_device *device) { struct d3d12_desc tmp; @@ -29624,11 +42298,13 @@ index 8c050cfeb32..ea7b6859cc1 100644 - - vkd3d_mutex_unlock(mutex); - +- d3d12_desc_write_atomic(dst, &tmp, device); + tmp.s.u.object = d3d12_desc_get_object_ref(src, device); - d3d12_desc_write_atomic(dst, &tmp, device); ++ descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); } -@@ -2455,8 +2472,9 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, + static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, +@@ -2455,8 +2745,9 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, return vr == VK_SUCCESS; } @@ -29640,7 +42316,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkBufferView vk_view = VK_NULL_HANDLE; -@@ -2465,16 +2483,18 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c +@@ -2465,16 +2756,18 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) return false; @@ -29664,7 +42340,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 *view = object; return true; } -@@ -2482,7 +2502,7 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c +@@ -2482,7 +2775,7 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c #define VKD3D_VIEW_RAW_BUFFER 0x1 static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, @@ -29673,7 +42349,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 unsigned int offset, unsigned int size, unsigned int structure_stride, unsigned int flags, struct vkd3d_view **view) { -@@ -2513,7 +2533,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, +@@ -2513,7 +2806,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, assert(d3d12_resource_is_buffer(resource)); @@ -29682,7 +42358,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 format, offset * element_size, size * element_size, view); } -@@ -2741,7 +2761,7 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de +@@ -2741,7 +3034,7 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de desc->layer_count = max_layer_count; } @@ -29691,7 +42367,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -@@ -2774,18 +2794,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, +@@ -2774,18 +3067,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, } } @@ -29718,7 +42394,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 *view = object; return true; } -@@ -2794,6 +2815,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, +@@ -2794,6 +3088,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) { struct VkDescriptorBufferInfo *buffer_info; @@ -29726,7 +42402,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 struct d3d12_resource *resource; if (!desc) -@@ -2802,13 +2824,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, +@@ -2802,13 +3097,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, return; } @@ -29747,7 +42423,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 if (desc->BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); -@@ -2824,8 +2852,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, +@@ -2824,8 +3125,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, buffer_info->range = VK_WHOLE_SIZE; } @@ -29757,7 +42433,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) -@@ -2842,7 +2869,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, +@@ -2842,7 +3142,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; @@ -29765,7 +42441,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 VkImage vk_image; if (!desc) -@@ -2857,15 +2883,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, +@@ -2857,15 +3156,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer SRV %#x.\n", desc->Format); @@ -29783,7 +42459,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 return; case D3D12_SRV_DIMENSION_TEXTURE2D: -@@ -2904,20 +2924,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, +@@ -2904,20 +3197,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; vkd3d_desc.allowed_swizzle = true; @@ -29805,7 +42481,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 unsigned int flags; if (!desc) -@@ -2933,15 +2946,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, +@@ -2933,15 +3219,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, } flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); @@ -29823,7 +42499,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, -@@ -2970,7 +2977,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, +@@ -2970,7 +3250,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; @@ -29831,7 +42507,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 if (!resource) { -@@ -3002,6 +3008,11 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, +@@ -3002,6 +3281,11 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, switch (desc->ViewDimension) { @@ -29843,7 +42519,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 case D3D12_SRV_DIMENSION_TEXTURE2D: vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; vkd3d_desc.miplevel_idx = desc->u.Texture2D.MostDetailedMip; -@@ -3066,13 +3077,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, +@@ -3066,13 +3350,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, } } @@ -29859,7 +42535,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) -@@ -3089,7 +3095,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, +@@ -3089,7 +3368,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; @@ -29867,7 +42543,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 VkImage vk_image; if (!desc) -@@ -3104,15 +3109,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, +@@ -3104,15 +3382,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer UAV %#x.\n", desc->Format); @@ -29885,7 +42561,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 return; case D3D12_UAV_DIMENSION_TEXTURE2D: -@@ -3150,13 +3149,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, +@@ -3150,13 +3422,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; vkd3d_desc.allowed_swizzle = false; @@ -29900,7 +42576,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, -@@ -3179,16 +3172,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ +@@ -3179,16 +3445,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ } flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); @@ -29918,7 +42594,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 if (counter_resource) { const struct vkd3d_format *format; -@@ -3198,13 +3186,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ +@@ -3198,13 +3459,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, @@ -29938,7 +42614,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, -@@ -3212,7 +3203,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, +@@ -3212,7 +3476,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; @@ -29946,7 +42622,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) return; -@@ -3227,6 +3217,9 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, +@@ -3227,6 +3490,9 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, { switch (desc->ViewDimension) { @@ -29956,7 +42632,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 case D3D12_UAV_DIMENSION_TEXTURE2D: vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; if (desc->u.Texture2D.PlaneSlice) -@@ -3257,13 +3250,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, +@@ -3257,13 +3523,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, } } @@ -29972,7 +42648,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, -@@ -3291,12 +3279,26 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d +@@ -3291,12 +3552,26 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d } bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, @@ -30000,7 +42676,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); assert(d3d12_resource_is_buffer(resource)); return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, -@@ -3412,21 +3414,21 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, +@@ -3412,21 +3687,21 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]); @@ -30029,7 +42705,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 } HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, -@@ -3448,7 +3450,7 @@ HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, +@@ -3448,7 +3723,7 @@ HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, /* RTVs */ static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_device *device) { @@ -30038,7 +42714,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 return; vkd3d_view_decref(rtv->view, device); -@@ -3527,10 +3529,9 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev +@@ -3527,10 +3802,9 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource)); @@ -30050,7 +42726,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); rtv_desc->format = vkd3d_desc.format; rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); -@@ -3543,7 +3544,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev +@@ -3543,7 +3817,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev /* DSVs */ static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_device *device) { @@ -30059,7 +42735,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 return; vkd3d_view_decref(dsv->view, device); -@@ -3612,10 +3613,9 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev +@@ -3612,10 +3886,9 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource)); @@ -30071,7 +42747,24 @@ index 8c050cfeb32..ea7b6859cc1 100644 dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); dsv_desc->format = vkd3d_desc.format; dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); -@@ -3883,7 +3883,6 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript +@@ -3810,7 +4083,15 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get + + TRACE("iface %p, descriptor %p.\n", iface, descriptor); + +- descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; ++ if (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) ++ { ++ descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; ++ } ++ else ++ { ++ WARN("Heap %p is not shader-visible.\n", iface); ++ descriptor->ptr = 0; ++ } + + return descriptor; + } +@@ -3883,7 +4164,6 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; VkDescriptorSetAllocateInfo set_desc; @@ -30079,7 +42772,7 @@ index 8c050cfeb32..ea7b6859cc1 100644 VkResult vr; set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; -@@ -3897,8 +3896,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript +@@ -3897,8 +4177,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript set_size.pDescriptorCounts = &variable_binding_size; if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) { @@ -30089,15 +42782,18 @@ index 8c050cfeb32..ea7b6859cc1 100644 return S_OK; } -@@ -3914,7 +3912,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri +@@ -3914,9 +4193,8 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV +- if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ++ if (!descriptor_heap->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) -@@ -3925,53 +3922,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri + return S_OK; + +@@ -3925,53 +4203,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) { @@ -30151,15 +42847,17 @@ index 8c050cfeb32..ea7b6859cc1 100644 if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) return hr; -@@ -3995,6 +3945,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript +@@ -3994,7 +4225,9 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript + if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) return hr; ++ descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); d3d12_device_add_ref(descriptor_heap->device = device); -@@ -4047,7 +3998,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, +@@ -4047,7 +4280,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, { memset(&dst[i].s, 0, sizeof(dst[i].s)); dst[i].index = i; @@ -30170,29 +42868,104 @@ index 8c050cfeb32..ea7b6859cc1 100644 else { diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index c964ea8fe3a..5e46b467252 100644 +index c964ea8fe3a..0b92cffcde3 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -1958,7 +1958,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, +@@ -20,6 +20,7 @@ + + #include "vkd3d_private.h" + #include "vkd3d_shaders.h" ++#include "vkd3d_shader_utils.h" + + /* ID3D12RootSignature */ + static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) +@@ -374,8 +375,8 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig + + if (unbounded && range->OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { +- WARN("An unbounded range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " +- "another unbounded range.\n"); ++ WARN("A range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " ++ "an unbounded range.\n"); + return E_INVALIDARG; + } + +@@ -1958,7 +1959,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_9}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, }; -@@ -2011,7 +2011,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER +@@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, + compile_info.next = shader_interface; + compile_info.source.code = code->pShaderBytecode; + compile_info.source.size = code->BytecodeLength; +- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; + compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; + compile_info.options = options; + compile_info.option_count = ARRAY_SIZE(options); + compile_info.log_level = VKD3D_SHADER_LOG_NONE; + compile_info.source_name = NULL; + +- if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) ++ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 ++ || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) + { + WARN("Failed to compile shader, vkd3d result %d.\n", ret); + return hresult_from_vkd3d_result(ret); +@@ -2008,10 +2009,11 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER + struct vkd3d_shader_scan_descriptor_info *descriptor_info) + { + struct vkd3d_shader_compile_info compile_info; ++ enum vkd3d_result ret; const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_9}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, }; +@@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER + compile_info.next = descriptor_info; + compile_info.source.code = code->pShaderBytecode; + compile_info.source.size = code->BytecodeLength; +- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; + compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; + compile_info.options = options; + compile_info.option_count = ARRAY_SIZE(options); + compile_info.log_level = VKD3D_SHADER_LOG_NONE; + compile_info.source_name = NULL; + ++ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) ++ return ret; ++ + return vkd3d_shader_scan(&compile_info, NULL); + } + +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +index 88301fbb313..159560afd8e 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + + if (!device) + { +- ID3D12Device_Release(&object->ID3D12Device_iface); ++ ID3D12Device_Release(&object->ID3D12Device1_iface); + return S_FALSE; + } + +- return return_interface(&object->ID3D12Device_iface, &IID_ID3D12Device, iid, device); ++ return return_interface(&object->ID3D12Device1_iface, &IID_ID3D12Device, iid, device); + } + + /* ID3D12RootSignatureDeserializer */ diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 77b795d6278..b0150754434 100644 +index 77b795d6278..a18287b4cd4 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -31,8 +31,8 @@ @@ -30223,10 +42996,29 @@ index 77b795d6278..b0150754434 100644 #define VKD3D_DESCRIPTOR_MAGIC_DSV VKD3D_MAKE_TAG('D', 'S', 'V', 0) #define VKD3D_DESCRIPTOR_MAGIC_RTV VKD3D_MAKE_TAG('R', 'T', 'V', 0) -@@ -252,6 +250,31 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) +@@ -149,9 +147,12 @@ struct vkd3d_vulkan_info + unsigned int max_vertex_attrib_divisor; + + VkPhysicalDeviceLimits device_limits; +- VkPhysicalDeviceSparseProperties sparse_properties; + struct vkd3d_device_descriptor_limits descriptor_limits; + ++ VkPhysicalDeviceSparseProperties sparse_properties; ++ bool sparse_binding; ++ bool sparse_residency_3d; ++ + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + + unsigned int shader_extension_count; +@@ -252,6 +253,36 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) { } ++static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) ++{ ++ return InterlockedIncrement((LONG volatile *)x); ++} ++ +static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) +{ + return InterlockedDecrement((LONG volatile *)x); @@ -30255,7 +43047,7 @@ index 77b795d6278..b0150754434 100644 #else /* _WIN32 */ #include -@@ -354,6 +377,63 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) +@@ -354,6 +385,72 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) ERR("Could not destroy the condition variable, error %d.\n", ret); } @@ -30266,6 +43058,15 @@ index 77b795d6278..b0150754434 100644 +} +# else +# error "vkd3d_atomic_decrement() not implemented for this platform" ++# endif /* HAVE_SYNC_SUB_AND_FETCH */ ++ ++# if HAVE_SYNC_ADD_AND_FETCH ++static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) ++{ ++ return __sync_add_and_fetch(x, 1); ++} ++# else ++# error "vkd3d_atomic_increment() not implemented for this platform" +# endif /* HAVE_SYNC_ADD_AND_FETCH */ + +# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP @@ -30319,7 +43120,21 @@ index 77b795d6278..b0150754434 100644 #endif /* _WIN32 */ HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, -@@ -563,6 +643,7 @@ struct d3d12_heap +@@ -519,10 +616,12 @@ struct vkd3d_signaled_semaphore + /* ID3D12Fence */ + struct d3d12_fence + { +- ID3D12Fence ID3D12Fence_iface; ++ ID3D12Fence1 ID3D12Fence1_iface; + LONG internal_refcount; + LONG refcount; + ++ D3D12_FENCE_FLAGS flags; ++ + uint64_t value; + uint64_t max_pending_value; + struct vkd3d_mutex mutex; +@@ -563,6 +662,7 @@ struct d3d12_heap { ID3D12Heap ID3D12Heap_iface; LONG refcount; @@ -30327,7 +43142,66 @@ index 77b795d6278..b0150754434 100644 bool is_private; D3D12_HEAP_DESC desc; -@@ -661,11 +742,9 @@ enum vkd3d_view_type +@@ -589,6 +689,30 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); + #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 + #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 + ++struct vkd3d_tiled_region_extent ++{ ++ unsigned int width; ++ unsigned int height; ++ unsigned int depth; ++}; ++ ++struct vkd3d_subresource_tile_info ++{ ++ unsigned int offset; ++ unsigned int count; ++ struct vkd3d_tiled_region_extent extent; ++}; ++ ++struct d3d12_resource_tile_info ++{ ++ VkExtent3D tile_extent; ++ unsigned int total_count; ++ unsigned int standard_mip_count; ++ unsigned int packed_mip_tile_count; ++ unsigned int subresource_count; ++ struct vkd3d_subresource_tile_info *subresources; ++}; ++ + /* ID3D12Resource */ + struct d3d12_resource + { +@@ -617,9 +741,16 @@ struct d3d12_resource + + struct d3d12_device *device; + ++ struct d3d12_resource_tile_info tiles; ++ + struct vkd3d_private_store private_store; + }; + ++static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) ++{ ++ return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); ++} ++ + static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) + { + return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; +@@ -632,6 +763,10 @@ static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resour + + bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); + HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); ++void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, ++ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, ++ UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, ++ D3D12_SUBRESOURCE_TILING *sub_resource_tilings); + + HRESULT d3d12_committed_resource_create(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, +@@ -661,11 +796,9 @@ enum vkd3d_view_type VKD3D_VIEW_TYPE_SAMPLER, }; @@ -30340,7 +43214,7 @@ index 77b795d6278..b0150754434 100644 union { VkBufferView vk_buffer_view; -@@ -691,9 +770,6 @@ struct vkd3d_view +@@ -691,9 +824,6 @@ struct vkd3d_view } info; }; @@ -30350,7 +43224,7 @@ index 77b795d6278..b0150754434 100644 struct vkd3d_texture_view_desc { VkImageViewType view_type; -@@ -707,32 +783,88 @@ struct vkd3d_texture_view_desc +@@ -707,32 +837,89 @@ struct vkd3d_texture_view_desc bool allowed_swizzle; }; @@ -30435,8 +43309,9 @@ index 77b795d6278..b0150754434 100644 + { + do + { -+ view = src->s.u.object; -+ } while (view && !vkd3d_view_incref(view)); ++ if (!(view = src->s.u.object)) ++ return NULL; ++ } while (!vkd3d_view_incref(view)); + + /* Check if the object is still in src to handle the case where it was + * already freed and reused elsewhere when the refcount was incremented. */ @@ -30450,7 +43325,19 @@ index 77b795d6278..b0150754434 100644 static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) { return (struct d3d12_desc *)cpu_handle.ptr; -@@ -761,13 +893,12 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device * +@@ -748,7 +935,10 @@ static inline void d3d12_desc_copy_raw(struct d3d12_desc *dst, const struct d3d1 + dst->s = src->s; + } + +-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); ++struct d3d12_descriptor_heap; ++ ++void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, ++ struct d3d12_device *device); + void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, + struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); + void d3d12_desc_create_srv(struct d3d12_desc *descriptor, +@@ -761,13 +951,12 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device * void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, @@ -30465,7 +43352,7 @@ index 77b795d6278..b0150754434 100644 VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; -@@ -787,7 +918,6 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev +@@ -787,7 +976,6 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev struct d3d12_dsv_desc { @@ -30473,7 +43360,7 @@ index 77b795d6278..b0150754434 100644 VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; -@@ -837,15 +967,10 @@ struct vkd3d_vk_descriptor_heap_layout +@@ -837,15 +1025,10 @@ struct vkd3d_vk_descriptor_heap_layout VkDescriptorSetLayout vk_set_layout; }; @@ -30490,7 +43377,15 @@ index 77b795d6278..b0150754434 100644 }; /* ID3D12DescriptorHeap */ -@@ -865,9 +990,13 @@ struct d3d12_descriptor_heap +@@ -858,6 +1041,7 @@ struct d3d12_descriptor_heap + D3D12_DESCRIPTOR_HEAP_DESC desc; + + struct d3d12_device *device; ++ bool use_vk_heaps; + + struct vkd3d_private_store private_store; + +@@ -865,9 +1049,13 @@ struct d3d12_descriptor_heap struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; struct vkd3d_mutex vk_sets_mutex; @@ -30505,7 +43400,7 @@ index 77b795d6278..b0150754434 100644 static inline struct d3d12_descriptor_heap *d3d12_desc_get_descriptor_heap(const struct d3d12_desc *descriptor) { return CONTAINING_RECORD(descriptor - descriptor->index, struct d3d12_descriptor_heap, descriptors); -@@ -882,22 +1011,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d +@@ -882,22 +1070,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap); @@ -30528,7 +43423,16 @@ index 77b795d6278..b0150754434 100644 /* ID3D12QueryHeap */ struct d3d12_query_heap { -@@ -1295,6 +1408,8 @@ struct d3d12_command_list +@@ -1254,7 +1426,7 @@ enum vkd3d_pipeline_bind_point + /* ID3D12CommandList */ + struct d3d12_command_list + { +- ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; ++ ID3D12GraphicsCommandList3 ID3D12GraphicsCommandList3_iface; + LONG refcount; + + D3D12_COMMAND_LIST_TYPE type; +@@ -1295,6 +1467,8 @@ struct d3d12_command_list VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); @@ -30537,20 +43441,101 @@ index 77b795d6278..b0150754434 100644 struct vkd3d_private_store private_store; }; -@@ -1485,6 +1600,12 @@ struct vkd3d_uav_clear_state +@@ -1339,6 +1513,8 @@ enum vkd3d_cs_op + VKD3D_CS_OP_WAIT, + VKD3D_CS_OP_SIGNAL, + VKD3D_CS_OP_EXECUTE, ++ VKD3D_CS_OP_UPDATE_MAPPINGS, ++ VKD3D_CS_OP_COPY_MAPPINGS, + }; + + struct vkd3d_cs_wait +@@ -1359,6 +1535,30 @@ struct vkd3d_cs_execute + unsigned int buffer_count; + }; + ++struct vkd3d_cs_update_mappings ++{ ++ struct d3d12_resource *resource; ++ struct d3d12_heap *heap; ++ D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; ++ D3D12_TILE_REGION_SIZE *region_sizes; ++ D3D12_TILE_RANGE_FLAGS *range_flags; ++ UINT *heap_range_offsets; ++ UINT *range_tile_counts; ++ UINT region_count; ++ UINT range_count; ++ D3D12_TILE_MAPPING_FLAGS flags; ++}; ++ ++struct vkd3d_cs_copy_mappings ++{ ++ struct d3d12_resource *dst_resource; ++ struct d3d12_resource *src_resource; ++ D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; ++ D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; ++ D3D12_TILE_REGION_SIZE region_size; ++ D3D12_TILE_MAPPING_FLAGS flags; ++}; ++ + struct vkd3d_cs_op_data + { + enum vkd3d_cs_op opcode; +@@ -1367,6 +1567,8 @@ struct vkd3d_cs_op_data + struct vkd3d_cs_wait wait; + struct vkd3d_cs_signal signal; + struct vkd3d_cs_execute execute; ++ struct vkd3d_cs_update_mappings update_mappings; ++ struct vkd3d_cs_copy_mappings copy_mappings; + } u; + }; + +@@ -1404,6 +1606,8 @@ struct d3d12_command_queue + * set, aux_op_queue.count must be zero. */ + struct d3d12_command_queue_op_array aux_op_queue; + ++ bool supports_sparse_binding; ++ + struct vkd3d_private_store private_store; + }; + +@@ -1415,6 +1619,7 @@ struct d3d12_command_signature + { + ID3D12CommandSignature ID3D12CommandSignature_iface; + LONG refcount; ++ unsigned int internal_refcount; + + D3D12_COMMAND_SIGNATURE_DESC desc; + +@@ -1485,12 +1690,26 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); ++struct desc_object_cache_head ++{ ++ void *head; ++ unsigned int spinlock; ++}; ++ +struct vkd3d_desc_object_cache +{ -+ void * volatile head; ++ struct desc_object_cache_head heads[16]; ++ unsigned int next_index; ++ unsigned int free_count; + size_t size; +}; + #define VKD3D_DESCRIPTOR_POOL_COUNT 6 /* ID3D12Device */ -@@ -1502,7 +1623,8 @@ struct d3d12_device + struct d3d12_device + { +- ID3D12Device ID3D12Device_iface; ++ ID3D12Device1 ID3D12Device1_iface; + LONG refcount; + + VkDevice vk_device; +@@ -1502,7 +1721,8 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator; struct vkd3d_mutex mutex; @@ -30560,7 +43545,7 @@ index 77b795d6278..b0150754434 100644 struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache; -@@ -1544,6 +1666,7 @@ struct d3d12_device +@@ -1544,6 +1764,7 @@ struct d3d12_device struct vkd3d_uav_clear_state uav_clear_state; VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; @@ -30568,10 +43553,37 @@ index 77b795d6278..b0150754434 100644 struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; }; -@@ -1577,19 +1700,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str - return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); +@@ -1554,40 +1775,27 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 + bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); + void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, + const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); +-struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface); ++struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface); + + static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) + { +- return ID3D12Device_QueryInterface(&device->ID3D12Device_iface, iid, object); ++ return ID3D12Device1_QueryInterface(&device->ID3D12Device1_iface, iid, object); } + static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) + { +- return ID3D12Device_AddRef(&device->ID3D12Device_iface); ++ return ID3D12Device1_AddRef(&device->ID3D12Device1_iface); + } + + static inline ULONG d3d12_device_release(struct d3d12_device *device) + { +- return ID3D12Device_Release(&device->ID3D12Device_iface); ++ return ID3D12Device1_Release(&device->ID3D12Device1_iface); + } + + static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) + { +- return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); +-} +- -static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, - const struct d3d12_desc *descriptor) -{ @@ -30583,11 +43595,10 @@ index 77b795d6278..b0150754434 100644 - idx ^= idx >> 3; - - return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; --} -- ++ return ID3D12Device1_GetDescriptorHandleIncrementSize(&device->ID3D12Device1_iface, descriptor_type); + } + /* utils */ - enum vkd3d_format_type - { -- 2.40.1 diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-46c7f65be8337a108a04a616ccd0c8a7732.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-46c7f65be8337a108a04a616ccd0c8a7732.patch deleted file mode 100644 index bfb087bd..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-46c7f65be8337a108a04a616ccd0c8a7732.patch +++ /dev/null @@ -1,19526 +0,0 @@ -From 8dab6cb1d8d984bee47df2da8288e01289fd1379 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 20 Sep 2023 06:49:52 +1000 -Subject: [PATCH] Updated vkd3d to 46c7f65be8337a108a04a616ccd0c8a773216f3f. - ---- - libs/vkd3d/Makefile.in | 1 + - libs/vkd3d/include/private/vkd3d_common.h | 22 + - .../include/private/vkd3d_shader_utils.h | 67 + - libs/vkd3d/include/vkd3d.h | 35 + - libs/vkd3d/include/vkd3d_shader.h | 291 +- - libs/vkd3d/include/vkd3d_windows.h | 5 + - libs/vkd3d/libs/vkd3d-common/debug.c | 17 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 50 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 430 ++- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 17 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 2968 +++++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 289 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 68 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1969 ++++++----- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 768 +++-- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 525 ++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 232 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 140 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 457 ++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 1385 +++++--- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 440 ++- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 128 +- - libs/vkd3d/libs/vkd3d/command.c | 674 ++-- - libs/vkd3d/libs/vkd3d/device.c | 245 +- - libs/vkd3d/libs/vkd3d/resource.c | 342 +- - libs/vkd3d/libs/vkd3d/state.c | 14 +- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 124 +- - 28 files changed, 9257 insertions(+), 2450 deletions(-) - create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h - create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c - -diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in -index 1ba0e9f71e1..f647af11d07 100644 ---- a/libs/vkd3d/Makefile.in -+++ b/libs/vkd3d/Makefile.in -@@ -17,6 +17,7 @@ SOURCES = \ - libs/vkd3d-shader/d3d_asm.c \ - libs/vkd3d-shader/d3dbc.c \ - libs/vkd3d-shader/dxbc.c \ -+ libs/vkd3d-shader/dxil.c \ - libs/vkd3d-shader/glsl.c \ - libs/vkd3d-shader/hlsl.c \ - libs/vkd3d-shader/hlsl.l \ -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 1ac23b4a085..f7d98f327f1 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -20,6 +20,7 @@ - #define __VKD3D_COMMON_H - - #include "config.h" -+#define WIN32_LEAN_AND_MEAN - #include "windows.h" - #include "vkd3d_types.h" - -@@ -28,6 +29,7 @@ - #include - #include - #include -+#include - - #ifdef _MSC_VER - #include -@@ -171,6 +173,11 @@ static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) - #endif - } - -+static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_t size) -+{ -+ return (~(size_t)0 - start) / size < count; -+} -+ - static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) - { - return low | ((uint16_t)high << 8); -@@ -186,6 +193,21 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) - return (x > y) - (x < y); - } - -+static inline bool bitmap_clear(uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] &= ~(1u << (idx & 0x1f)); -+} -+ -+static inline bool bitmap_set(uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] |= (1u << (idx & 0x1f)); -+} -+ -+static inline bool bitmap_is_set(const uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] & (1u << (idx & 0x1f)); -+} -+ - static inline int ascii_isupper(int c) - { - return 'A' <= c && c <= 'Z'; -diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h -new file mode 100644 -index 00000000000..c9f8001e590 ---- /dev/null -+++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h -@@ -0,0 +1,67 @@ -+/* -+ * Copyright 2023 Conor McCarthy for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_SHADER_UTILS_H -+#define __VKD3D_SHADER_UTILS_H -+ -+#include "vkd3d_shader.h" -+ -+#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') -+#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') -+#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') -+ -+static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, -+ enum vkd3d_shader_source_type *type, char **messages) -+{ -+ struct vkd3d_shader_dxbc_desc desc; -+ enum vkd3d_result ret; -+ unsigned int i; -+ -+ *type = VKD3D_SHADER_SOURCE_NONE; -+ -+ if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) -+ return ret; -+ -+ for (i = 0; i < desc.section_count; ++i) -+ { -+ uint32_t tag = desc.sections[i].tag; -+ if (tag == TAG_SHDR || tag == TAG_SHEX) -+ { -+ *type = VKD3D_SHADER_SOURCE_DXBC_TPF; -+#ifndef VKD3D_SHADER_UNSUPPORTED_DXIL -+ break; -+#else -+ } -+ else if (tag == TAG_DXIL) -+ { -+ *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; -+ /* Default to DXIL if both are present. */ -+ break; -+#endif -+ } -+ } -+ -+ vkd3d_shader_free_dxbc(&desc); -+ -+ if (*type == VKD3D_SHADER_SOURCE_NONE) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ return VKD3D_OK; -+} -+ -+#endif /* __VKD3D_SHADER_UTILS_H */ -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index 72ed3ced671..2ccda47248a 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -207,7 +207,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); - VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); - - VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); -+ -+/** -+ * Acquire the Vulkan queue backing a command queue. -+ * -+ * While a queue is acquired by the client, it is locked so that -+ * neither the vkd3d library nor other threads can submit work to -+ * it. For that reason it should be released as soon as possible with -+ * vkd3d_release_vk_queue(). The lock is not reentrant, so the same -+ * queue must not be acquired more than once by the same thread. -+ * -+ * Work submitted through the Direct3D 12 API exposed by vkd3d is not -+ * always immediately submitted to the Vulkan queue; sometimes it is -+ * kept in another internal queue, which might not necessarily be -+ * empty at the time vkd3d_acquire_vk_queue() is called. For this -+ * reason, work submitted directly to the Vulkan queue might appear to -+ * the Vulkan driver as being submitted before other work submitted -+ * though the Direct3D 12 API. If this is not desired, it is -+ * recommended to synchronize work submission using an ID3D12Fence -+ * object, by submitting to the queue a signal operation after all the -+ * Direct3D 12 work is submitted and waiting for it before calling -+ * vkd3d_acquire_vk_queue(). -+ * -+ * \since 1.0 -+ */ - VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); -+ -+/** -+ * Release the Vulkan queue backing a command queue. -+ * -+ * This must be paired to an earlier corresponding -+ * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan -+ * queue returned by vkd3d_acquire_vk_queue() must not be used any -+ * more. -+ * -+ * \since 1.0 -+ */ - VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); - - VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 274241546ea..fc2c1257d16 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -85,6 +85,16 @@ enum vkd3d_shader_structure_type - * \since 1.3 - */ - VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, -+ /** -+ * The structure is a vkd3d_shader_scan_signature_info structure. -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, -+ /** -+ * The structure is a vkd3d_shader_next_stage_info structure. -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -134,6 +144,15 @@ enum vkd3d_shader_compile_option_formatting_flags - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), - }; - -+/** Determines how matrices are stored. \since 1.9 */ -+enum vkd3d_shader_compile_option_pack_matrix_order -+{ -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR = 0x00000001, -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR = 0x00000002, -+ -+ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER), -+}; -+ - enum vkd3d_shader_compile_option_name - { - /** -@@ -164,6 +183,15 @@ enum vkd3d_shader_compile_option_name - * \since 1.7 - */ - VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE = 0x00000006, -+ /** -+ * This option specifies default matrix packing order for HLSL sources. -+ * Explicit variable modifiers or pragmas will take precedence. -+ * -+ * \a value is a member of enum vkd3d_shader_compile_option_pack_matrix_order. -+ * -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER = 0x00000007, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), - }; -@@ -327,6 +355,25 @@ struct vkd3d_shader_parameter - } u; - }; - -+/** -+ * Symbolic register indices for mapping uniform constant register sets in -+ * legacy Direct3D bytecode to constant buffer views in the target environment. -+ * -+ * Members of this enumeration are used in -+ * \ref vkd3d_shader_resource_binding.register_index. -+ * -+ * \since 1.9 -+ */ -+enum vkd3d_shader_d3dbc_constant_register -+{ -+ /** The float constant register set, c# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER = 0x0, -+ /** The integer constant register set, i# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, -+ /** The boolean constant register set, b# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, -+}; -+ - /** - * Describes the mapping of a single resource or resource array to its binding - * point in the target environment. -@@ -351,7 +398,14 @@ struct vkd3d_shader_resource_binding - * support multiple register spaces, this parameter must be set to 0. - */ - unsigned int register_space; -- /** Register index of the DXBC resource. */ -+ /** -+ * Register index of the Direct3D resource. -+ * -+ * For legacy Direct3D shaders, vkd3d-shader maps each constant register -+ * set to a single constant buffer view. This parameter names the register -+ * set to map, and must be a member of -+ * enum vkd3d_shader_d3dbc_constant_register. -+ */ - unsigned int register_index; - /** Shader stage(s) to which the resource is visible. */ - enum vkd3d_shader_visibility shader_visibility; -@@ -611,6 +665,11 @@ enum vkd3d_shader_source_type - * model 1, 2, and 3 shaders. \since 1.3 - */ - VKD3D_SHADER_SOURCE_D3D_BYTECODE, -+ /** -+ * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is -+ * the format used for Direct3D shader model 6 shaders. \since 1.9 -+ */ -+ VKD3D_SHADER_SOURCE_DXBC_DXIL, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), - }; -@@ -620,7 +679,7 @@ enum vkd3d_shader_target_type - { - /** - * The shader has no type or is to be ignored. This is not a valid value -- * for vkd3d_shader_compile() or vkd3d_shader_scan(). -+ * for vkd3d_shader_compile(). - */ - VKD3D_SHADER_TARGET_NONE, - /** -@@ -1281,6 +1340,8 @@ enum vkd3d_shader_descriptor_info_flag - /** The descriptor is a UAV resource, on which the shader performs - * atomic ops. \since 1.6 */ - VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS = 0x00000008, -+ /** The descriptor is a raw (byte-addressed) buffer. \since 1.9 */ -+ VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER = 0x00000010, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_INFO_FLAG), - }; -@@ -1320,6 +1381,20 @@ struct vkd3d_shader_descriptor_info - * A chained structure enumerating the descriptors declared by a shader. - * - * This structure extends vkd3d_shader_compile_info. -+ * -+ * When scanning a legacy Direct3D shader, vkd3d-shader enumerates each -+ * constant register set used by the shader as a single constant buffer -+ * descriptor, as follows: -+ * - The \ref vkd3d_shader_descriptor_info.type field is set to -+ * VKD3D_SHADER_DESCRIPTOR_TYPE_CBV. -+ * - The \ref vkd3d_shader_descriptor_info.register_space field is set to zero. -+ * - The \ref vkd3d_shader_descriptor_info.register_index field is set to a -+ * member of enum vkd3d_shader_d3dbc_constant_register denoting which set -+ * is used. -+ * - The \ref vkd3d_shader_descriptor_info.count field is set to one. -+ * -+ * In summary, there may be up to three such descriptors, one for each register -+ * set used by the shader: float, integer, and boolean. - */ - struct vkd3d_shader_scan_descriptor_info - { -@@ -1389,6 +1464,8 @@ enum vkd3d_shader_sysval_semantic - VKD3D_SHADER_SV_TESS_FACTOR_TRIINT = 0x0e, - VKD3D_SHADER_SV_TESS_FACTOR_LINEDET = 0x0f, - VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN = 0x10, -+ /** Render target; SV_Target in Direct3D shader model 6 shaders. \since 1.9 */ -+ VKD3D_SHADER_SV_TARGET = 0x40, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SYSVAL_SEMANTIC), - }; -@@ -1551,6 +1628,134 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com - | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); - } - -+/** -+ * A chained structure containing descriptions of shader inputs and outputs. -+ * -+ * This structure is currently implemented only for DXBC and legacy D3D bytecode -+ * source types. -+ * For DXBC shaders, the returned information is parsed directly from the -+ * signatures embedded in the DXBC shader. -+ * For legacy D3D shaders, the returned information is synthesized based on -+ * registers declared or used by shader instructions. -+ * For all other shader types, the structure is zeroed. -+ * -+ * All members (except for \ref type and \ref next) are output-only. -+ * -+ * This structure is passed to vkd3d_shader_scan() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * Members of this structure are allocated by vkd3d-shader and should be freed -+ * with vkd3d_shader_free_scan_signature_info() when no longer needed. -+ * -+ * All signatures may contain pointers into the input shader, and should only -+ * be accessed while the input shader remains valid. -+ * -+ * Signature elements are synthesized from legacy Direct3D bytecode as follows: -+ * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an -+ * uppercase string corresponding to the HLSL name for the usage, e.g. -+ * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. -+ * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the -+ * usage index. -+ * - The \ref vkd3d_shader_signature_element.stream_index is always 0. -+ * -+ * Signature elements are synthesized for any input or output register declared -+ * or used in a legacy Direct3D bytecode shader, including the following: -+ * - Shader model 1 and 2 colour and texture coordinate registers. -+ * - The shader model 1 pixel shader output register. -+ * - Shader model 1 and 2 vertex shader output registers (position, fog, and -+ * point size). -+ * - Shader model 3 pixel shader system value input registers (pixel position -+ * and face). -+ * -+ * \since 1.9 -+ */ -+struct vkd3d_shader_scan_signature_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** The shader input varyings. */ -+ struct vkd3d_shader_signature input; -+ -+ /** The shader output varyings. */ -+ struct vkd3d_shader_signature output; -+ -+ /** The shader patch constant varyings. */ -+ struct vkd3d_shader_signature patch_constant; -+}; -+ -+/** -+ * Describes the mapping of a output varying register in a shader stage, -+ * to an input varying register in the following shader stage. -+ * -+ * This structure is used in struct vkd3d_shader_next_stage_info. -+ */ -+struct vkd3d_shader_varying_map -+{ -+ /** -+ * The signature index (in the output signature) of the output varying. -+ * If greater than or equal to the number of elements in the output -+ * signature, signifies that the varying is consumed by the next stage but -+ * not written by this one. -+ */ -+ unsigned int output_signature_index; -+ /** The register index of the input varying to map this register to. */ -+ unsigned int input_register_index; -+ /** The mask consumed by the destination register. */ -+ unsigned int input_mask; -+}; -+ -+/** -+ * A chained structure which describes the next shader in the pipeline. -+ * -+ * This structure is optional, and should only be provided if there is in fact -+ * another shader in the pipeline. -+ * However, depending on the input and output formats, this structure may be -+ * necessary in order to generate shaders which correctly match each other. -+ * If the structure or its individual fields are not provided, vkd3d-shader -+ * will generate shaders which may be correct in isolation, but are not -+ * guaranteed to correctly match each other. -+ * -+ * This structure is passed to vkd3d_shader_compile() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * This structure contains only input parameters. -+ * -+ * \since 1.9 -+ */ -+struct vkd3d_shader_next_stage_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_NEXT_STAGE_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** -+ * A mapping of output varyings in this shader stage to input varyings -+ * in the next shader stage. -+ * -+ * This mapping should include exactly one element for each varying -+ * consumed by the next shader stage. -+ * If this shader stage outputs a varying that is not consumed by the next -+ * shader stage, that varying should be absent from this array. -+ * -+ * If this field is absent, vkd3d-shader will map varyings from one stage -+ * to another based on their register index. -+ * For Direct3D shader model 3.0, such a default mapping will be incorrect -+ * unless the registers are allocated in the same order, and hence this -+ * field is necessary to correctly match inter-stage varyings. -+ * This mapping may also be necessary under other circumstances where the -+ * varying interface does not match exactly. -+ * -+ * This mapping may be constructed by vkd3d_shader_build_varying_map(). -+ */ -+ const struct vkd3d_shader_varying_map *varying_map; -+ /** The number of registers provided in \ref varying_map. */ -+ unsigned int varying_count; -+}; -+ - #ifdef LIBVKD3D_SHADER_SOURCE - # define VKD3D_SHADER_API VKD3D_EXPORT - #else -@@ -1623,12 +1828,14 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * - * Depending on the source and target types, this function may support the - * following chained structures: -+ * - vkd3d_shader_hlsl_source_info - * - vkd3d_shader_interface_info -+ * - vkd3d_shader_next_stage_info - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * - vkd3d_shader_spirv_domain_shader_target_info - * - vkd3d_shader_spirv_target_info - * - vkd3d_shader_transform_feedback_info -- * - vkd3d_shader_hlsl_source_info - * - * \param compile_info A chained structure containing compilation parameters. - * -@@ -1784,6 +1991,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * Parse shader source code or byte code, returning various types of requested - * information. - * -+ * The \a source_type member of \a compile_info must be set to the type of the -+ * shader. -+ * -+ * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which -+ * case vkd3d_shader_scan() will return information about the shader in -+ * isolation. Alternatively, it may be set to a valid compilation target for the -+ * shader, in which case vkd3d_shader_scan() will return information that -+ * reflects the interface for a shader as it will be compiled to that target. -+ * In this case other chained structures may be appended to \a compile_info as -+ * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, -+ * such as vkd3d_shader_spirv_target_info. -+ * -+ * (For a hypothetical example, suppose the source shader distinguishes float -+ * and integer texture data, but the target environment does not support integer -+ * textures. In this case vkd3d_shader_compile() might translate integer -+ * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would -+ * accurately report whether the texture expects integer or float data, but -+ * using the relevant specific target type would report -+ * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) -+ * - * Currently this function supports the following code types: - * - VKD3D_SHADER_SOURCE_DXBC_TPF - * -@@ -1791,6 +2018,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * \n - * The DXBC_TPF scanner supports the following chained structures: - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * \n - * Although the \a compile_info parameter is read-only, chained structures - * passed to this function need not be, and may serve as output parameters, -@@ -1827,12 +2055,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); - - /** -- * Read the input signature of a compiled shader, returning a structural -+ * Read the input signature of a compiled DXBC shader, returning a structural - * description which can be easily parsed by C code. - * - * This function parses a compiled shader. To parse a standalone root signature, - * use vkd3d_shader_parse_root_signature(). - * -+ * This function only parses DXBC shaders, and only retrieves the input -+ * signature. To retrieve signatures from other shader types, or other signature -+ * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. -+ * This function returns the same input signature that is returned in -+ * struct vkd3d_shader_scan_signature_info. -+ * - * \param dxbc Compiled byte code, in DXBC format. - * - * \param signature Output location in which the parsed root signature will be -@@ -2022,6 +2256,48 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb - VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** -+ * Free members of struct vkd3d_shader_scan_signature_info allocated by -+ * vkd3d_shader_scan(). -+ * -+ * This function may free members of vkd3d_shader_scan_signature_info, but -+ * does not free the structure itself. -+ * -+ * \param info Scan information to free. -+ * -+ * \since 1.9 -+ */ -+VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); -+ -+/** -+ * Build a mapping of output varyings in a shader stage to input varyings in -+ * the following shader stage. -+ * -+ * This mapping should be used in struct vkd3d_shader_next_stage_info to -+ * compile the first shader. -+ * -+ * \param output_signature The output signature of the first shader. -+ * -+ * \param input_signature The input signature of the second shader. -+ * -+ * \param count On output, contains the number of entries written into -+ * \ref varyings. -+ * -+ * \param varyings Pointer to an output array of varyings. -+ * This must point to space for N varyings, where N is the number of elements -+ * in the input signature. -+ * -+ * \remark Valid legacy Direct3D pixel shaders have at most 12 varying inputs: -+ * 10 inter-stage varyings, face, and position. -+ * Therefore, in practice, it is safe to call this function with a -+ * pre-allocated array with a fixed size of 12. -+ * -+ * \since 1.9 -+ */ -+VKD3D_SHADER_API void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *count, struct vkd3d_shader_varying_map *varyings); -+ - #endif /* VKD3D_SHADER_NO_PROTOTYPES */ - - /** Type of vkd3d_shader_get_version(). */ -@@ -2087,6 +2363,13 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, - typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** Type of vkd3d_shader_build_varying_map(). \since 1.9 */ -+typedef void (*PFN_vkd3d_shader_build_varying_map)(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *count, struct vkd3d_shader_varying_map *varyings); -+/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ -+typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); -+ - #ifdef __cplusplus - } - #endif /* __cplusplus */ -diff --git a/libs/vkd3d/include/vkd3d_windows.h b/libs/vkd3d/include/vkd3d_windows.h -index 002ff667cbc..7b0e972d828 100644 ---- a/libs/vkd3d/include/vkd3d_windows.h -+++ b/libs/vkd3d/include/vkd3d_windows.h -@@ -64,6 +64,7 @@ typedef int HRESULT; - - # define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) - # define DXGI_ERROR_MORE_DATA _HRESULT_TYPEDEF_(0x887a0003) -+# define DXGI_ERROR_UNSUPPORTED _HRESULT_TYPEDEF_(0x887a0004) - - # define D3DERR_INVALIDCALL _HRESULT_TYPEDEF_(0x8876086c) - -@@ -124,6 +125,8 @@ typedef struct _GUID - # endif - - typedef GUID IID; -+typedef GUID CLSID; -+typedef GUID UUID; - - # ifdef INITGUID - # ifndef __cplusplus -@@ -222,9 +225,11 @@ typedef struct SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES; - # if defined(__cplusplus) && !defined(CINTERFACE) - # define REFIID const IID & - # define REFGUID const GUID & -+# define REFCLSID const CLSID & - # else - # define REFIID const IID * const - # define REFGUID const GUID * const -+# define REFCLSID const CLSID * const - # endif - - #if defined(__cplusplus) && !defined(CINTERFACE) -diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index b363efbd360..aa7df5bd764 100644 ---- a/libs/vkd3d/libs/vkd3d-common/debug.c -+++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include - #ifdef HAVE_PTHREAD_H - #include - #endif -@@ -44,11 +45,11 @@ extern const char *const vkd3d_dbg_env_name; - - static const char *const debug_level_names[] = - { -- /* VKD3D_DBG_LEVEL_NONE */ "none", -- /* VKD3D_DBG_LEVEL_ERR */ "err", -- /* VKD3D_DBG_LEVEL_FIXME */ "fixme", -- /* VKD3D_DBG_LEVEL_WARN */ "warn", -- /* VKD3D_DBG_LEVEL_TRACE */ "trace", -+ [VKD3D_DBG_LEVEL_NONE ] = "none", -+ [VKD3D_DBG_LEVEL_ERR ] = "err", -+ [VKD3D_DBG_LEVEL_FIXME] = "fixme", -+ [VKD3D_DBG_LEVEL_WARN ] = "warn", -+ [VKD3D_DBG_LEVEL_TRACE] = "trace", - }; - - enum vkd3d_dbg_level vkd3d_dbg_get_level(void) -@@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch - - assert(level < ARRAY_SIZE(debug_level_names)); - -+#ifdef _WIN32 -+ vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); -+#elif HAVE_GETTID -+ vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); -+#else - vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); -+#endif - va_start(args, fmt); - vkd3d_dbg_voutput(fmt, args); - va_end(args); -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 0a821b5c878..f0c386f1b3a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -578,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e - { - static const char *const resource_type_names[] = - { -- /* VKD3D_SHADER_RESOURCE_NONE */ "none", -- /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", -+ [VKD3D_SHADER_RESOURCE_NONE ] = "none", -+ [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", - }; - - if (type < ARRAY_SIZE(resource_type_names)) -@@ -601,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const - { - static const char *const data_type_names[] = - { -- /* VKD3D_DATA_FLOAT */ "float", -- /* VKD3D_DATA_INT */ "int", -- /* VKD3D_DATA_RESOURCE */ "resource", -- /* VKD3D_DATA_SAMPLER */ "sampler", -- /* VKD3D_DATA_UAV */ "uav", -- /* VKD3D_DATA_UINT */ "uint", -- /* VKD3D_DATA_UNORM */ "unorm", -- /* VKD3D_DATA_SNORM */ "snorm", -- /* VKD3D_DATA_OPAQUE */ "opaque", -- /* VKD3D_DATA_MIXED */ "mixed", -- /* VKD3D_DATA_DOUBLE */ "double", -- /* VKD3D_DATA_CONTINUED */ "", -- /* VKD3D_DATA_UNUSED */ "", -+ [VKD3D_DATA_FLOAT ] = "float", -+ [VKD3D_DATA_INT ] = "int", -+ [VKD3D_DATA_RESOURCE ] = "resource", -+ [VKD3D_DATA_SAMPLER ] = "sampler", -+ [VKD3D_DATA_UAV ] = "uav", -+ [VKD3D_DATA_UINT ] = "uint", -+ [VKD3D_DATA_UNORM ] = "unorm", -+ [VKD3D_DATA_SNORM ] = "snorm", -+ [VKD3D_DATA_OPAQUE ] = "opaque", -+ [VKD3D_DATA_MIXED ] = "mixed", -+ [VKD3D_DATA_DOUBLE ] = "double", -+ [VKD3D_DATA_CONTINUED] = "", -+ [VKD3D_DATA_UNUSED ] = "", - }; - const char *name; - int i; -@@ -714,7 +714,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3D_DECL_USAGE_TEXCOORD: -- shader_addline(buffer, "texture%u", semantic->usage_idx); -+ shader_addline(buffer, "texcoord%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_TANGENT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 712613ac13b..d5104ae9b79 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -214,6 +214,9 @@ struct vkd3d_shader_sm1_parser - bool abort; - - struct vkd3d_shader_parser p; -+ -+#define MAX_CONSTANT_COUNT 8192 -+ uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; - }; - - /* This table is not order or position dependent. */ -@@ -260,9 +263,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = - /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, - /* Constant definitions */ -- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, -+ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, - /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, -@@ -327,9 +330,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = - /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, - /* Constant definitions */ -- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, -+ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, - /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, -@@ -490,6 +493,309 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader - dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; - } - -+static struct signature_element *find_signature_element(const struct shader_signature *signature, -+ const char *semantic_name, unsigned int semantic_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) -+ && e[i].semantic_index == semantic_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+static struct signature_element *find_signature_element_by_register_index( -+ const struct shader_signature *signature, unsigned int register_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (e[i].register_index == register_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+#define SM1_COLOR_REGISTER_OFFSET 8 -+ -+static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, -+ unsigned int register_index, bool is_dcl, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if ((element = find_signature_element(signature, name, index))) -+ { -+ element->mask |= mask; -+ if (!is_dcl) -+ element->used_mask |= mask; -+ return true; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ return false; -+ element = &signature->elements[signature->element_count++]; -+ -+ element->semantic_name = name; -+ element->semantic_index = index; -+ element->stream_index = 0; -+ element->sysval_semantic = sysval; -+ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->register_index = register_index; -+ element->target_location = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = is_dcl ? 0 : mask; -+ element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; -+ -+ return true; -+} -+ -+static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ unsigned int register_index, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if (!(element = find_signature_element_by_register_index(signature, register_index))) -+ { -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, -+ "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); -+ return; -+ } -+ -+ element->used_mask |= mask; -+} -+ -+static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) -+{ -+ unsigned int register_index = reg->idx[0].offset; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL -+ && sm1->p.shader_version.major == 1 && !register_index) -+ return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); -+ return true; -+ -+ case VKD3DSPR_INPUT: -+ /* For vertex shaders or sm3 pixel shaders, we should have already -+ * had a DCL instruction. Otherwise, this is a colour input. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, false, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, false, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); -+ -+ case VKD3DSPR_TEXTURE: -+ /* For vertex shaders, this is ADDR. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ return true; -+ return add_signature_element(sm1, false, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_OUTPUT: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ { -+ /* For sm < 2 vertex shaders, this is TEXCRDOUT. -+ * -+ * For sm3 vertex shaders, this is OUTPUT, but we already -+ * should have had a DCL instruction. */ -+ if (sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, true, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, true, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ } -+ /* fall through */ -+ -+ case VKD3DSPR_ATTROUT: -+ return add_signature_element(sm1, true, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); -+ -+ case VKD3DSPR_COLOROUT: -+ return add_signature_element(sm1, true, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_DEPTHOUT: -+ return add_signature_element(sm1, true, "DEPTH", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ case VKD3DSPR_RASTOUT: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, true, "POSITION", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, true, "FOG", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ case 2: -+ return add_signature_element(sm1, true, "PSIZE", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid rasterizer output index %u.", register_index); -+ return true; -+ } -+ -+ case VKD3DSPR_MISCTYPE: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, false, "VPOS", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, false, "VFACE", 0, -+ VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid miscellaneous fragment input index %u.", register_index); -+ return true; -+ } -+ -+ default: -+ return true; -+ } -+} -+ -+static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_semantic *semantic) -+{ -+ const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ unsigned int mask = semantic->resource.reg.write_mask; -+ bool output; -+ -+ static const char sm1_semantic_names[][13] = -+ { -+ [VKD3D_DECL_USAGE_POSITION ] = "POSITION", -+ [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", -+ [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", -+ [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", -+ [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", -+ [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", -+ [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", -+ [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", -+ [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", -+ [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", -+ [VKD3D_DECL_USAGE_COLOR ] = "COLOR", -+ [VKD3D_DECL_USAGE_FOG ] = "FOG", -+ [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", -+ [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", -+ }; -+ -+ if (reg->type == VKD3DSPR_OUTPUT) -+ output = true; -+ else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) -+ output = false; -+ else /* vpos and vface don't have a semantic. */ -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* sm2 pixel shaders use DCL but don't provide a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* With the exception of vertex POSITION output, none of these are system -+ * values. Pixel POSITION input is not equivalent to SV_Position; the closer -+ * equivalent is VPOS, which is not declared as a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ -+ return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], -+ semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); -+} -+ -+static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, -+ enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) -+{ -+ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; -+ -+ desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); -+ if (from_def) -+ { -+ /* d3d shaders have a maximum of 8192 constants; we should not overrun -+ * this array. */ -+ assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); -+ bitmap_set(sm1->constant_def_mask[set], index); -+ } -+} -+ -+static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) -+{ -+ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; -+ uint32_t register_index = reg->idx[0].offset; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ desc->temp_count = max(desc->temp_count, register_index + 1); -+ break; -+ -+ case VKD3DSPR_CONST: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST2: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST3: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST4: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONSTINT: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONSTBOOL: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ default: -+ break; -+ } -+ -+ add_signature_element_from_register(sm1, reg, false, mask); -+} -+ - /* Read a parameter token from the input stream, and possibly a relative - * addressing token. */ - static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, -@@ -640,6 +946,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, - range = &semantic->resource.range; - range->space = 0; - range->first = range->last = semantic->resource.reg.reg.idx[0].offset; -+ -+ add_signature_element_from_semantic(sm1, semantic); - } - - static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, -@@ -744,6 +1052,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, - } - } - -+static unsigned int mask_from_swizzle(unsigned int swizzle) -+{ -+ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); -+} -+ - static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) - { - struct vkd3d_shader_src_param *src_params, *predicate; -@@ -817,22 +1133,28 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else if (ins->handler_idx == VKD3DSIH_DEFB) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else if (ins->handler_idx == VKD3DSIH_DEFI) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else - { - /* Destination token */ - if (ins->dst_count) -+ { - shader_sm1_read_dst_param(sm1, &p, dst_param); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false); -+ } - - /* Predication token */ - if (ins->predicate) -@@ -840,7 +1162,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - - /* Other source tokens */ - for (i = 0; i < ins->src_count; ++i) -+ { - shader_sm1_read_src_param(sm1, &p, &src_params[i]); -+ shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false); -+ } - } - - if (sm1->abort) -@@ -947,12 +1272,30 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, - return VKD3D_OK; - } - -+static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, -+ enum vkd3d_shader_d3dbc_constant_register set) -+{ -+ unsigned int j; -+ -+ /* Find the highest constant index which is not written by a DEF -+ * instruction. We can't (easily) use an FFZ function for this since it -+ * needs to be limited by the highest used register index. */ -+ for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) -+ { -+ if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) -+ return j; -+ } -+ -+ return 0; -+} -+ - int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) - { - struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm1_parser *sm1; -+ unsigned int i; - int ret; - - if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) -@@ -992,6 +1335,9 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - - *parser = &sm1->p; - -+ for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) -+ sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); -+ - return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; - } - -@@ -1292,17 +1638,12 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - if (var->is_param && var->is_uniform) - { -- struct vkd3d_string_buffer *name; -+ char *new_name; - -- if (!(name = hlsl_get_string_buffer(ctx))) -- { -- buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; -+ if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) - return; -- } -- vkd3d_string_buffer_printf(name, "$%s", var->name); - vkd3d_free((char *)var->name); -- var->name = hlsl_strdup(ctx, name->buffer); -- hlsl_release_string_buffer(ctx, name); -+ var->name = new_name; - } - } - } -@@ -1340,7 +1681,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - else - { - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -- put_u32(buffer, var->regs[r].bind_count); -+ put_u32(buffer, var->bind_count[r]); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ -@@ -1553,12 +1894,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - for (i = 0; i < ctx->constant_defs.count; ++i) - { -+ const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = D3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, -- .reg = i, -+ .reg = constant_reg->index, - }; - - if (ctx->profile->major_version > 1) -@@ -1567,7 +1909,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) -- put_f32(buffer, ctx->constant_defs.values[i].f[x]); -+ put_f32(buffer, constant_reg->value.f[x]); - } - } - -@@ -1640,10 +1982,6 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu - - switch (sampler_dim) - { -- case HLSL_SAMPLER_DIM_1D: -- res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; -- break; -- - case HLSL_SAMPLER_DIM_2D: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; - break; -@@ -1686,14 +2024,19 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) - continue; - -- count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; -+ count = var->bind_count[HLSL_REGSET_SAMPLERS]; - - for (i = 0; i < count; ++i) - { - if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - { - sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; -- assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); -+ if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ { -+ /* These can appear in sm4-style combined sample instructions. */ -+ hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); -+ continue; -+ } - - reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; - write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); -@@ -1844,6 +2187,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - -+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ switch (jump->type) -+ { -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ { -+ struct hlsl_reg *reg = &jump->condition.node->reg; -+ -+ struct sm1_instruction instr = -+ { -+ .opcode = VKD3D_SM1_OP_TEXKILL, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = reg->id, -+ .dst.writemask = reg->writemask, -+ .has_dst = 1, -+ }; -+ -+ write_sm1_instruction(ctx, buffer, &instr); -+ break; -+ } -+ -+ default: -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ } -+} -+ - static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_load *load = hlsl_ir_load(instr); -@@ -2038,6 +2410,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - write_sm1_expr(ctx, buffer, instr); - break; - -+ case HLSL_IR_JUMP: -+ write_sm1_jump(ctx, buffer, instr); -+ break; -+ - case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); - break; -@@ -2063,7 +2439,6 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) - { - struct vkd3d_bytecode_buffer buffer = {0}; -- int ret; - - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - -@@ -2076,10 +2451,17 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - - put_u32(&buffer, D3DSIO_END); - -- if (!(ret = buffer.status)) -+ if (buffer.status) -+ ctx->result = buffer.status; -+ -+ if (!ctx->result) - { - out->code = buffer.data; - out->size = buffer.size; - } -- return ret; -+ else -+ { -+ vkd3d_free(buffer.data); -+ } -+ return ctx->result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 3e3f06faeb5..cedc3da4a83 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -391,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - read_dword(&ptr, &e[i].sysval_semantic); - read_dword(&ptr, &e[i].component_type); - read_dword(&ptr, &e[i].register_index); -+ e[i].target_location = e[i].register_index; - e[i].register_count = 1; - read_dword(&ptr, &mask); - e[i].mask = mask & 0xff; -@@ -493,8 +494,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - return ret; - break; - -+ case TAG_DXIL: - case TAG_SHDR: - case TAG_SHEX: -+ if ((section->tag == TAG_DXIL) != desc->is_dxil) -+ { -+ TRACE("Skipping chunk %#x.\n", section->tag); -+ break; -+ } - if (desc->byte_code) - FIXME("Multiple shader code chunks.\n"); - desc->byte_code = section->data.code; -@@ -505,10 +512,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - TRACE("Skipping AON9 shader code chunk.\n"); - break; - -- case TAG_DXIL: -- FIXME("Skipping DXIL shader model 6+ code chunk.\n"); -- break; -- - default: - TRACE("Skipping chunk %#x.\n", section->tag); - break; -@@ -529,12 +532,6 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - { - int ret; - -- desc->byte_code = NULL; -- desc->byte_code_size = 0; -- memset(&desc->input_signature, 0, sizeof(desc->input_signature)); -- memset(&desc->output_signature, 0, sizeof(desc->output_signature)); -- memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); -- - ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); - if (!desc->byte_code) - ret = VKD3D_ERROR_INVALID_ARGUMENT; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -new file mode 100644 -index 00000000000..b78c78d34a7 ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -0,0 +1,2968 @@ -+/* -+ * Copyright 2023 Conor McCarthy for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "vkd3d_shader_private.h" -+ -+#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -+#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) -+ -+#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) -+#define DXIL_OP_MAX_OPERANDS 17 -+ -+enum bitcode_block_id -+{ -+ BLOCKINFO_BLOCK = 0, -+ MODULE_BLOCK = 8, -+ PARAMATTR_BLOCK = 9, -+ PARAMATTR_GROUP_BLOCK = 10, -+ CONSTANTS_BLOCK = 11, -+ FUNCTION_BLOCK = 12, -+ VALUE_SYMTAB_BLOCK = 14, -+ METADATA_BLOCK = 15, -+ METADATA_ATTACHMENT_BLOCK = 16, -+ TYPE_BLOCK = 17, -+ USELIST_BLOCK = 18, -+}; -+ -+enum bitcode_blockinfo_code -+{ -+ SETBID = 1, -+ BLOCKNAME = 2, -+ SETRECORDNAME = 3, -+}; -+ -+enum bitcode_block_abbreviation -+{ -+ END_BLOCK = 0, -+ ENTER_SUBBLOCK = 1, -+ DEFINE_ABBREV = 2, -+ UNABBREV_RECORD = 3, -+}; -+ -+enum bitcode_abbrev_type -+{ -+ ABBREV_FIXED = 1, -+ ABBREV_VBR = 2, -+ ABBREV_ARRAY = 3, -+ ABBREV_CHAR = 4, -+ ABBREV_BLOB = 5, -+}; -+ -+enum bitcode_address_space -+{ -+ ADDRESS_SPACE_DEFAULT, -+ ADDRESS_SPACE_DEVICEMEM, -+ ADDRESS_SPACE_CBUFFER, -+ ADDRESS_SPACE_GROUPSHARED, -+}; -+ -+enum bitcode_module_code -+{ -+ MODULE_CODE_VERSION = 1, -+ MODULE_CODE_GLOBALVAR = 7, -+ MODULE_CODE_FUNCTION = 8, -+}; -+ -+enum bitcode_constant_code -+{ -+ CST_CODE_SETTYPE = 1, -+ CST_CODE_NULL = 2, -+ CST_CODE_UNDEF = 3, -+ CST_CODE_INTEGER = 4, -+ CST_CODE_FLOAT = 6, -+ CST_CODE_STRING = 8, -+ CST_CODE_CE_GEP = 12, -+ CST_CODE_CE_INBOUNDS_GEP = 20, -+ CST_CODE_DATA = 22, -+}; -+ -+enum bitcode_function_code -+{ -+ FUNC_CODE_DECLAREBLOCKS = 1, -+ FUNC_CODE_INST_BINOP = 2, -+ FUNC_CODE_INST_CAST = 3, -+ FUNC_CODE_INST_RET = 10, -+ FUNC_CODE_INST_BR = 11, -+ FUNC_CODE_INST_SWITCH = 12, -+ FUNC_CODE_INST_PHI = 16, -+ FUNC_CODE_INST_ALLOCA = 19, -+ FUNC_CODE_INST_LOAD = 20, -+ FUNC_CODE_INST_EXTRACTVAL = 26, -+ FUNC_CODE_INST_CMP2 = 28, -+ FUNC_CODE_INST_VSELECT = 29, -+ FUNC_CODE_INST_CALL = 34, -+ FUNC_CODE_INST_ATOMICRMW = 38, -+ FUNC_CODE_INST_LOADATOMIC = 41, -+ FUNC_CODE_INST_GEP = 43, -+ FUNC_CODE_INST_STORE = 44, -+ FUNC_CODE_INST_STOREATOMIC = 45, -+ FUNC_CODE_INST_CMPXCHG = 46, -+}; -+ -+enum bitcode_type_code -+{ -+ TYPE_CODE_NUMENTRY = 1, -+ TYPE_CODE_VOID = 2, -+ TYPE_CODE_FLOAT = 3, -+ TYPE_CODE_DOUBLE = 4, -+ TYPE_CODE_LABEL = 5, -+ TYPE_CODE_INTEGER = 7, -+ TYPE_CODE_POINTER = 8, -+ TYPE_CODE_HALF = 10, -+ TYPE_CODE_ARRAY = 11, -+ TYPE_CODE_VECTOR = 12, -+ TYPE_CODE_METADATA = 16, -+ TYPE_CODE_STRUCT_ANON = 18, -+ TYPE_CODE_STRUCT_NAME = 19, -+ TYPE_CODE_STRUCT_NAMED = 20, -+ TYPE_CODE_FUNCTION = 21, -+}; -+ -+enum bitcode_value_symtab_code -+{ -+ VST_CODE_ENTRY = 1, -+ VST_CODE_BBENTRY = 2, -+}; -+ -+enum dx_intrinsic_opcode -+{ -+ DX_STORE_OUTPUT = 5, -+}; -+ -+struct sm6_pointer_info -+{ -+ const struct sm6_type *type; -+ enum bitcode_address_space addr_space; -+}; -+ -+struct sm6_struct_info -+{ -+ const char *name; -+ unsigned int elem_count; -+ const struct sm6_type *elem_types[]; -+}; -+ -+struct sm6_function_info -+{ -+ const struct sm6_type *ret_type; -+ unsigned int param_count; -+ const struct sm6_type *param_types[]; -+}; -+ -+struct sm6_array_info -+{ -+ unsigned int count; -+ const struct sm6_type *elem_type; -+}; -+ -+enum sm6_type_class -+{ -+ TYPE_CLASS_VOID, -+ TYPE_CLASS_INTEGER, -+ TYPE_CLASS_FLOAT, -+ TYPE_CLASS_POINTER, -+ TYPE_CLASS_STRUCT, -+ TYPE_CLASS_FUNCTION, -+ TYPE_CLASS_VECTOR, -+ TYPE_CLASS_ARRAY, -+ TYPE_CLASS_LABEL, -+ TYPE_CLASS_METADATA, -+}; -+ -+struct sm6_type -+{ -+ enum sm6_type_class class; -+ union -+ { -+ unsigned int width; -+ struct sm6_pointer_info pointer; -+ struct sm6_struct_info *struc; -+ struct sm6_function_info *function; -+ struct sm6_array_info array; -+ } u; -+}; -+ -+enum sm6_value_type -+{ -+ VALUE_TYPE_FUNCTION, -+ VALUE_TYPE_REG, -+}; -+ -+struct sm6_function_data -+{ -+ const char *name; -+ bool is_prototype; -+ unsigned int attribs_id; -+}; -+ -+struct sm6_value -+{ -+ const struct sm6_type *type; -+ enum sm6_value_type value_type; -+ bool is_undefined; -+ union -+ { -+ struct sm6_function_data function; -+ struct vkd3d_shader_register reg; -+ } u; -+}; -+ -+struct dxil_record -+{ -+ unsigned int code; -+ unsigned int operand_count; -+ uint64_t operands[]; -+}; -+ -+struct sm6_symbol -+{ -+ unsigned int id; -+ const char *name; -+}; -+ -+struct sm6_block -+{ -+ struct vkd3d_shader_instruction *instructions; -+ size_t instruction_capacity; -+ size_t instruction_count; -+}; -+ -+struct sm6_function -+{ -+ const struct sm6_value *declaration; -+ -+ struct sm6_block *blocks[1]; -+ size_t block_count; -+ -+ size_t value_count; -+}; -+ -+struct dxil_block -+{ -+ const struct dxil_block *parent; -+ enum bitcode_block_id id; -+ unsigned int abbrev_len; -+ unsigned int start; -+ unsigned int length; -+ unsigned int level; -+ -+ /* The abbrev, block and record structs are not relocatable. */ -+ struct dxil_abbrev **abbrevs; -+ size_t abbrev_capacity; -+ size_t abbrev_count; -+ unsigned int blockinfo_bid; -+ bool has_bid; -+ -+ struct dxil_block **child_blocks; -+ size_t child_block_capacity; -+ size_t child_block_count; -+ -+ struct dxil_record **records; -+ size_t record_capacity; -+ size_t record_count; -+}; -+ -+struct sm6_parser -+{ -+ const uint32_t *ptr, *start, *end; -+ unsigned int bitpos; -+ -+ struct dxil_block root_block; -+ struct dxil_block *current_block; -+ -+ struct dxil_global_abbrev **abbrevs; -+ size_t abbrev_capacity; -+ size_t abbrev_count; -+ -+ struct sm6_type *types; -+ size_t type_count; -+ -+ struct sm6_symbol *global_symbols; -+ size_t global_symbol_count; -+ -+ struct vkd3d_shader_dst_param *output_params; -+ -+ struct sm6_function *functions; -+ size_t function_count; -+ -+ struct sm6_value *values; -+ size_t value_count; -+ size_t value_capacity; -+ size_t cur_max_value; -+ -+ struct vkd3d_shader_parser p; -+}; -+ -+struct dxil_abbrev_operand -+{ -+ uint64_t context; -+ bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); -+}; -+ -+struct dxil_abbrev -+{ -+ unsigned int count; -+ bool is_array; -+ struct dxil_abbrev_operand operands[]; -+}; -+ -+struct dxil_global_abbrev -+{ -+ unsigned int block_id; -+ struct dxil_abbrev abbrev; -+}; -+ -+static const uint64_t CALL_CONV_FLAG_EXPLICIT_TYPE = 1ull << 15; -+ -+static size_t size_add_with_overflow_check(size_t a, size_t b) -+{ -+ size_t i = a + b; -+ return (i < a) ? SIZE_MAX : i; -+} -+ -+static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) -+{ -+ return CONTAINING_RECORD(parser, struct sm6_parser, p); -+} -+ -+static bool sm6_parser_is_end(struct sm6_parser *sm6) -+{ -+ return sm6->ptr == sm6->end; -+} -+ -+static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) -+{ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ return *sm6->ptr++; -+} -+ -+static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) -+{ -+ unsigned int l, prev_len = 0; -+ uint32_t bits; -+ -+ if (!length) -+ return 0; -+ -+ assert(length < 32); -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ -+ assert(sm6->bitpos < 32); -+ bits = *sm6->ptr >> sm6->bitpos; -+ l = 32 - sm6->bitpos; -+ if (l <= length) -+ { -+ ++sm6->ptr; -+ if (sm6_parser_is_end(sm6) && l < length) -+ { -+ sm6->p.failed = true; -+ return bits; -+ } -+ sm6->bitpos = 0; -+ bits |= *sm6->ptr << l; -+ prev_len = l; -+ } -+ sm6->bitpos += length - prev_len; -+ -+ return bits & ((1 << length) - 1); -+} -+ -+static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) -+{ -+ unsigned int bits, flag, mask, shift = 0; -+ uint64_t result = 0; -+ -+ if (!length) -+ return 0; -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ -+ flag = 1 << (length - 1); -+ mask = flag - 1; -+ do -+ { -+ bits = sm6_parser_read_bits(sm6, length); -+ result |= (uint64_t)(bits & mask) << shift; -+ shift += length - 1; -+ } while ((bits & flag) && !sm6->p.failed && shift < 64); -+ -+ sm6->p.failed |= !!(bits & flag); -+ -+ return result; -+} -+ -+static void sm6_parser_align_32(struct sm6_parser *sm6) -+{ -+ if (!sm6->bitpos) -+ return; -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return; -+ } -+ -+ ++sm6->ptr; -+ sm6->bitpos = 0; -+} -+ -+static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) -+{ -+ /* BLOCKINFO blocks must only occur immediately below the module root block. */ -+ if (block->level > 1) -+ { -+ WARN("Invalid blockinfo block level %u.\n", block->level); -+ return false; -+ } -+ -+ switch (record->code) -+ { -+ case SETBID: -+ if (!record->operand_count) -+ { -+ WARN("Missing id operand.\n"); -+ return false; -+ } -+ if (record->operands[0] > UINT_MAX) -+ WARN("Truncating block id %"PRIu64".\n", record->operands[0]); -+ block->blockinfo_bid = record->operands[0]; -+ block->has_bid = true; -+ break; -+ case BLOCKNAME: -+ case SETRECORDNAME: -+ break; -+ default: -+ FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); -+ break; -+ } -+ -+ return true; -+} -+ -+static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) -+{ -+ unsigned int reserve; -+ -+ switch (block->id) -+ { -+ /* Rough initial reserve sizes for small shaders. */ -+ case CONSTANTS_BLOCK: reserve = 32; break; -+ case FUNCTION_BLOCK: reserve = 128; break; -+ case METADATA_BLOCK: reserve = 32; break; -+ case TYPE_BLOCK: reserve = 32; break; -+ default: reserve = 8; break; -+ } -+ reserve = max(reserve, block->record_count + 1); -+ if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) -+ { -+ ERR("Failed to allocate %u records.\n", reserve); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ block->records[block->record_count++] = record; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ enum vkd3d_result ret = VKD3D_OK; -+ unsigned int code, count, i; -+ struct dxil_record *record; -+ -+ code = sm6_parser_read_vbr(sm6, 6); -+ -+ count = sm6_parser_read_vbr(sm6, 6); -+ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ record->code = code; -+ record->operand_count = count; -+ -+ for (i = 0; i < count; ++i) -+ record->operands[i] = sm6_parser_read_vbr(sm6, 6); -+ if (sm6->p.failed) -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ -+ if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) -+ vkd3d_free(record); -+ -+ return ret; -+} -+ -+static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = context; -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = sm6_parser_read_bits(sm6, context); -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = sm6_parser_read_vbr(sm6, context); -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ int count = sm6_parser_read_vbr(sm6, 6); -+ sm6_parser_align_32(sm6); -+ for (; count > 0; count -= 4) -+ sm6_parser_read_uint32(sm6); -+ FIXME("Unhandled blob operand.\n"); -+ return false; -+} -+ -+static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) -+{ -+ enum bitcode_abbrev_type prev_type, type; -+ unsigned int i; -+ -+ abbrev->is_array = false; -+ -+ for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) -+ { -+ if (sm6_parser_read_bits(sm6, 1)) -+ { -+ if (prev_type == ABBREV_ARRAY) -+ { -+ WARN("Unexpected literal abbreviation after array.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); -+ abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; -+ continue; -+ } -+ -+ switch (type = sm6_parser_read_bits(sm6, 3)) -+ { -+ case ABBREV_FIXED: -+ case ABBREV_VBR: -+ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); -+ abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand -+ : sm6_parser_read_vbr_operand; -+ break; -+ -+ case ABBREV_ARRAY: -+ if (prev_type == ABBREV_ARRAY || i != count - 2) -+ { -+ WARN("Unexpected array abbreviation.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->is_array = true; -+ --i; -+ --count; -+ break; -+ -+ case ABBREV_CHAR: -+ abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; -+ break; -+ -+ case ABBREV_BLOB: -+ if (prev_type == ABBREV_ARRAY || i != count - 1) -+ { -+ WARN("Unexpected blob abbreviation.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; -+ break; -+ } -+ -+ prev_type = type; -+ } -+ -+ abbrev->count = count; -+ -+ return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ unsigned int count = sm6_parser_read_vbr(sm6, 5); -+ struct dxil_global_abbrev *global_abbrev; -+ enum vkd3d_result ret; -+ -+ assert(block->id == BLOCKINFO_BLOCK); -+ -+ if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) -+ || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) -+ { -+ ERR("Failed to allocate global abbreviation.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) -+ { -+ vkd3d_free(global_abbrev); -+ return ret; -+ } -+ -+ if (!block->has_bid) -+ { -+ WARN("Missing blockinfo block id.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (block->blockinfo_bid == MODULE_BLOCK) -+ { -+ FIXME("Unhandled global abbreviation for module block.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ global_abbrev->block_id = block->blockinfo_bid; -+ -+ sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ struct dxil_abbrev *abbrev; -+ enum vkd3d_result ret; -+ unsigned int count; -+ -+ if (block->id == BLOCKINFO_BLOCK) -+ return sm6_parser_add_global_abbrev(sm6); -+ -+ count = sm6_parser_read_vbr(sm6, 5); -+ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) -+ || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) -+ { -+ ERR("Failed to allocate block abbreviation.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) -+ { -+ vkd3d_free(abbrev); -+ return ret; -+ } -+ -+ block->abbrevs[block->abbrev_count++] = abbrev; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) -+{ -+ enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; -+ struct dxil_block *block = sm6->current_block; -+ struct dxil_record *temp, *record; -+ unsigned int i, count, array_len; -+ struct dxil_abbrev *abbrev; -+ uint64_t code; -+ -+ if (abbrev_id >= block->abbrev_count) -+ { -+ WARN("Invalid abbreviation id %u.\n", abbrev_id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ abbrev = block->abbrevs[abbrev_id]; -+ if (!(count = abbrev->count)) -+ return VKD3D_OK; -+ if (count == 1 && abbrev->is_array) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ /* First operand is the record code. The array is included in the count, but will be done separately. */ -+ count -= abbrev->is_array + 1; -+ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) -+ goto fail; -+ if (code > UINT_MAX) -+ FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); -+ record->code = code; -+ -+ for (i = 0; i < count; ++i) -+ if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) -+ goto fail; -+ record->operand_count = count; -+ -+ /* An array can occur only as the last operand. */ -+ if (abbrev->is_array) -+ { -+ array_len = sm6_parser_read_vbr(sm6, 6); -+ if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count + array_len); -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; -+ } -+ record = temp; -+ -+ for (i = 0; i < array_len; ++i) -+ { -+ if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, -+ &record->operands[count + i])) -+ { -+ goto fail; -+ } -+ } -+ record->operand_count += array_len; -+ } -+ -+ if ((ret = dxil_block_add_record(block, record)) < 0) -+ goto fail; -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(record); -+ return ret; -+} -+ -+static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, -+ struct sm6_parser *sm6); -+ -+static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) -+{ -+ unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; -+ struct dxil_block *block; -+ enum vkd3d_result ret; -+ -+ sm6->current_block = parent; -+ -+ do -+ { -+ unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); -+ -+ switch (abbrev_id) -+ { -+ case END_BLOCK: -+ sm6_parser_align_32(sm6); -+ return VKD3D_OK; -+ -+ case ENTER_SUBBLOCK: -+ if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) -+ { -+ WARN("Invalid subblock parent id %u.\n", parent->id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, -+ max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) -+ || !(block = vkd3d_calloc(1, sizeof(*block)))) -+ { -+ ERR("Failed to allocate block.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_block_init(block, parent, sm6)) < 0) -+ { -+ vkd3d_free(block); -+ return ret; -+ } -+ -+ parent->child_blocks[parent->child_block_count++] = block; -+ sm6->current_block = parent; -+ break; -+ -+ case DEFINE_ABBREV: -+ if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) -+ return ret; -+ break; -+ -+ case UNABBREV_RECORD: -+ if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) -+ { -+ WARN("Failed to read unabbreviated record.\n"); -+ return ret; -+ } -+ break; -+ -+ default: -+ if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) -+ { -+ WARN("Failed to read abbreviated record.\n"); -+ return ret; -+ } -+ break; -+ } -+ } while (!sm6->p.failed); -+ -+ return VKD3D_ERROR_INVALID_SHADER; -+} -+ -+static size_t sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, -+ unsigned int block_id) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < sm6->abbrev_count; ++i) -+ count += sm6->abbrevs[i]->block_id == block_id; -+ -+ return count; -+} -+ -+static void dxil_block_destroy(struct dxil_block *block) -+{ -+ size_t i; -+ -+ for (i = 0; i < block->record_count; ++i) -+ vkd3d_free(block->records[i]); -+ vkd3d_free(block->records); -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ { -+ dxil_block_destroy(block->child_blocks[i]); -+ vkd3d_free(block->child_blocks[i]); -+ } -+ vkd3d_free(block->child_blocks); -+ -+ block->records = NULL; -+ block->record_count = 0; -+ block->child_blocks = NULL; -+ block->child_block_count = 0; -+} -+ -+static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, -+ struct sm6_parser *sm6) -+{ -+ size_t i, abbrev_count = 0; -+ enum vkd3d_result ret; -+ -+ block->parent = parent; -+ block->level = parent ? parent->level + 1 : 0; -+ block->id = sm6_parser_read_vbr(sm6, 8); -+ block->abbrev_len = sm6_parser_read_vbr(sm6, 4); -+ sm6_parser_align_32(sm6); -+ block->length = sm6_parser_read_uint32(sm6); -+ block->start = sm6->ptr - sm6->start; -+ -+ if (sm6->p.failed) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) -+ { -+ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, -+ block->abbrev_count, sizeof(*block->abbrevs))) -+ { -+ ERR("Failed to allocate block abbreviations.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < sm6->abbrev_count; ++i) -+ if (sm6->abbrevs[i]->block_id == block->id) -+ block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; -+ -+ assert(abbrev_count == block->abbrev_count); -+ } -+ -+ if ((ret = dxil_block_read(block, sm6)) < 0) -+ dxil_block_destroy(block); -+ -+ for (i = abbrev_count; i < block->abbrev_count; ++i) -+ vkd3d_free(block->abbrevs[i]); -+ vkd3d_free(block->abbrevs); -+ block->abbrevs = NULL; -+ block->abbrev_count = 0; -+ -+ return ret; -+} -+ -+static size_t dxil_block_compute_function_count(const struct dxil_block *root) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < root->child_block_count; ++i) -+ count += root->child_blocks[i]->id == FUNCTION_BLOCK; -+ -+ return count; -+} -+ -+static size_t dxil_block_compute_module_decl_count(const struct dxil_block *block) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code == MODULE_CODE_FUNCTION; -+ return count; -+} -+ -+static size_t dxil_block_compute_constants_count(const struct dxil_block *block) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code != CST_CODE_SETTYPE; -+ return count; -+} -+ -+static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, size_t count) -+{ -+ size_t i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free(abbrevs[i]); -+ vkd3d_free(abbrevs); -+} -+ -+static const struct dxil_block *sm6_parser_get_level_one_block(const struct sm6_parser *sm6, -+ enum bitcode_block_id id, bool *is_unique) -+{ -+ const struct dxil_block *block, *found = NULL; -+ size_t i; -+ -+ for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) -+ { -+ block = sm6->root_block.child_blocks[i]; -+ if (block->id != id) -+ continue; -+ -+ if (!found) -+ found = block; -+ else -+ *is_unique = false; -+ } -+ -+ return found; -+} -+ -+static char *dxil_record_to_string(const struct dxil_record *record, unsigned int offset) -+{ -+ unsigned int i; -+ char *str; -+ -+ assert(offset <= record->operand_count); -+ if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) -+ return NULL; -+ -+ for (i = offset; i < record->operand_count; ++i) -+ str[i - offset] = record->operands[i]; -+ -+ return str; -+} -+ -+static bool dxil_record_validate_operand_min_count(const struct dxil_record *record, unsigned int min_count, -+ struct sm6_parser *sm6) -+{ -+ if (record->operand_count >= min_count) -+ return true; -+ -+ WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Invalid operand count %u for record code %u.", record->operand_count, record->code); -+ return false; -+} -+ -+static void dxil_record_validate_operand_max_count(const struct dxil_record *record, unsigned int max_count, -+ struct sm6_parser *sm6) -+{ -+ if (record->operand_count <= max_count) -+ return; -+ -+ WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); -+} -+ -+static bool dxil_record_validate_operand_count(const struct dxil_record *record, unsigned int min_count, -+ unsigned int max_count, struct sm6_parser *sm6) -+{ -+ dxil_record_validate_operand_max_count(record, max_count, sm6); -+ return dxil_record_validate_operand_min_count(record, min_count, sm6); -+} -+ -+static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_record *record; -+ size_t i, type_count, type_index; -+ const struct dxil_block *block; -+ char *struct_name = NULL; -+ unsigned int j, count; -+ struct sm6_type *type; -+ uint64_t type_id; -+ bool is_unique; -+ -+ sm6->p.location.line = 0; -+ sm6->p.location.column = 0; -+ -+ if (!(block = sm6_parser_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) -+ { -+ WARN("No type definitions found.\n"); -+ return VKD3D_OK; -+ } -+ if (!is_unique) -+ WARN("Ignoring invalid extra type table(s).\n"); -+ -+ sm6->p.location.line = block->id; -+ -+ type_count = 0; -+ for (i = 0; i < block->record_count; ++i) -+ type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; -+ -+ /* The type array must not be relocated. */ -+ if (!(sm6->types = vkd3d_calloc(type_count, sizeof(*sm6->types)))) -+ { -+ ERR("Failed to allocate type array.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ -+ type = &sm6->types[sm6->type_count]; -+ type_index = sm6->type_count; -+ -+ switch (record->code) -+ { -+ case TYPE_CODE_ARRAY: -+ case TYPE_CODE_VECTOR: -+ if (!dxil_record_validate_operand_count(record, 2, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = record->code == TYPE_CODE_ARRAY ? TYPE_CLASS_ARRAY : TYPE_CLASS_VECTOR; -+ -+ if (!(type->u.array.count = record->operands[0])) -+ { -+ TRACE("Setting unbounded for type %zu.\n", type_index); -+ type->u.array.count = UINT_MAX; -+ } -+ -+ if ((type_id = record->operands[1]) >= type_count) -+ { -+ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.array.elem_type = &sm6->types[type_id]; -+ break; -+ -+ case TYPE_CODE_DOUBLE: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 64; -+ break; -+ -+ case TYPE_CODE_FLOAT: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 32; -+ break; -+ -+ case TYPE_CODE_FUNCTION: -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if (record->operands[0]) -+ FIXME("Unhandled vararg function type %zu.\n", type_index); -+ -+ type->class = TYPE_CLASS_FUNCTION; -+ -+ if ((type_id = record->operands[1]) >= type_count) -+ { -+ WARN("Invalid return type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ count = record->operand_count - 2; -+ if (vkd3d_object_range_overflow(sizeof(type->u.function), count, sizeof(type->u.function->param_types[0])) -+ || !(type->u.function = vkd3d_malloc(offsetof(struct sm6_function_info, param_types[count])))) -+ { -+ ERR("Failed to allocate function parameter types.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ type->u.function->ret_type = &sm6->types[type_id]; -+ type->u.function->param_count = count; -+ for (j = 0; j < count; ++j) -+ { -+ if ((type_id = record->operands[j + 2]) >= type_count) -+ { -+ WARN("Invalid parameter type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ vkd3d_free(type->u.function); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.function->param_types[j] = &sm6->types[type_id]; -+ } -+ break; -+ -+ case TYPE_CODE_HALF: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 16; -+ break; -+ -+ case TYPE_CODE_INTEGER: -+ { -+ uint64_t width; -+ -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = TYPE_CLASS_INTEGER; -+ -+ switch ((width = record->operands[0])) -+ { -+ case 1: -+ case 8: -+ case 16: -+ case 32: -+ case 64: -+ break; -+ default: -+ WARN("Invalid integer width %"PRIu64" for type %zu.\n", width, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.width = width; -+ break; -+ } -+ -+ case TYPE_CODE_LABEL: -+ type->class = TYPE_CLASS_LABEL; -+ break; -+ -+ case TYPE_CODE_METADATA: -+ type->class = TYPE_CLASS_METADATA; -+ break; -+ -+ case TYPE_CODE_NUMENTRY: -+ continue; -+ -+ case TYPE_CODE_POINTER: -+ if (!dxil_record_validate_operand_count(record, 1, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = TYPE_CLASS_POINTER; -+ -+ if ((type_id = record->operands[0]) >= type_count) -+ { -+ WARN("Invalid pointee type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.pointer.type = &sm6->types[type_id]; -+ type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : ADDRESS_SPACE_DEFAULT; -+ break; -+ -+ case TYPE_CODE_STRUCT_ANON: -+ case TYPE_CODE_STRUCT_NAMED: -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if (record->code == TYPE_CODE_STRUCT_NAMED && !struct_name) -+ { -+ WARN("Missing struct name before struct type %zu.\n", type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ type->class = TYPE_CLASS_STRUCT; -+ -+ count = record->operand_count - 1; -+ if (vkd3d_object_range_overflow(sizeof(type->u.struc), count, sizeof(type->u.struc->elem_types[0])) -+ || !(type->u.struc = vkd3d_malloc(offsetof(struct sm6_struct_info, elem_types[count])))) -+ { -+ ERR("Failed to allocate struct element types.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (record->operands[0]) -+ FIXME("Ignoring struct packed attribute.\n"); -+ -+ type->u.struc->elem_count = count; -+ for (j = 0; j < count; ++j) -+ { -+ if ((type_id = record->operands[j + 1]) >= type_count) -+ { -+ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ vkd3d_free(type->u.struc); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.struc->elem_types[j] = &sm6->types[type_id]; -+ } -+ -+ if (record->code == TYPE_CODE_STRUCT_ANON) -+ { -+ type->u.struc->name = NULL; -+ break; -+ } -+ -+ type->u.struc->name = struct_name; -+ struct_name = NULL; -+ break; -+ -+ case TYPE_CODE_STRUCT_NAME: -+ if (!(struct_name = dxil_record_to_string(record, 0))) -+ { -+ ERR("Failed to allocate struct name.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ if (!struct_name[0]) -+ WARN("Struct name is empty for type %zu.\n", type_index); -+ continue; -+ -+ case TYPE_CODE_VOID: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_VOID; -+ break; -+ -+ default: -+ FIXME("Unhandled type %u at index %zu.\n", record->code, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ ++sm6->type_count; -+ } -+ -+ assert(sm6->type_count == type_count); -+ -+ if (struct_name) -+ { -+ WARN("Unused struct name %s.\n", struct_name); -+ vkd3d_free(struct_name); -+ } -+ -+ return VKD3D_OK; -+} -+ -+static inline bool sm6_type_is_void(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_VOID; -+} -+ -+static inline bool sm6_type_is_integer(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER; -+} -+ -+static inline bool sm6_type_is_i8(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER && type->u.width == 8; -+} -+ -+static inline bool sm6_type_is_i32(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER && type->u.width == 32; -+} -+ -+static inline bool sm6_type_is_floating_point(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_FLOAT; -+} -+ -+static inline bool sm6_type_is_numeric(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER || type->class == TYPE_CLASS_FLOAT; -+} -+ -+static inline bool sm6_type_is_pointer(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_POINTER; -+} -+ -+static bool sm6_type_is_numeric_aggregate(const struct sm6_type *type) -+{ -+ unsigned int i; -+ -+ switch (type->class) -+ { -+ case TYPE_CLASS_ARRAY: -+ case TYPE_CLASS_VECTOR: -+ return sm6_type_is_numeric(type->u.array.elem_type); -+ -+ case TYPE_CLASS_STRUCT: -+ /* Do not handle nested structs. Support can be added if they show up. */ -+ for (i = 0; i < type->u.struc->elem_count; ++i) -+ if (!sm6_type_is_numeric(type->u.struc->elem_types[i])) -+ return false; -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+static inline bool sm6_type_is_struct(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_STRUCT; -+} -+ -+static inline bool sm6_type_is_function(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_FUNCTION; -+} -+ -+static inline bool sm6_type_is_function_pointer(const struct sm6_type *type) -+{ -+ return sm6_type_is_pointer(type) && sm6_type_is_function(type->u.pointer.type); -+} -+ -+static inline bool sm6_type_is_handle(const struct sm6_type *type) -+{ -+ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Handle"); -+} -+ -+static inline const struct sm6_type *sm6_type_get_element_type(const struct sm6_type *type) -+{ -+ return (type->class == TYPE_CLASS_ARRAY || type->class == TYPE_CLASS_VECTOR) ? type->u.array.elem_type : type; -+} -+ -+static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type *type, -+ enum bitcode_address_space addr_space, struct sm6_parser *sm6) -+{ -+ size_t i, start = type - sm6->types; -+ const struct sm6_type *pointer_type; -+ -+ /* DXC seems usually to place the pointer type immediately after its pointee. */ -+ for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) -+ { -+ pointer_type = &sm6->types[i]; -+ if (sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type -+ && pointer_type->u.pointer.addr_space == addr_space) -+ return pointer_type; -+ } -+ -+ return NULL; -+} -+ -+/* Never returns null for elem_idx 0. */ -+static const struct sm6_type *sm6_type_get_scalar_type(const struct sm6_type *type, unsigned int elem_idx) -+{ -+ switch (type->class) -+ { -+ case TYPE_CLASS_ARRAY: -+ case TYPE_CLASS_VECTOR: -+ if (elem_idx >= type->u.array.count) -+ return NULL; -+ return sm6_type_get_scalar_type(type->u.array.elem_type, 0); -+ -+ case TYPE_CLASS_POINTER: -+ return sm6_type_get_scalar_type(type->u.pointer.type, 0); -+ -+ case TYPE_CLASS_STRUCT: -+ if (elem_idx >= type->u.struc->elem_count) -+ return NULL; -+ return sm6_type_get_scalar_type(type->u.struc->elem_types[elem_idx], 0); -+ -+ default: -+ return type; -+ } -+} -+ -+static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) -+{ -+ if (type_id >= sm6->type_count) -+ { -+ WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, -+ "DXIL type id %"PRIu64" is invalid.", type_id); -+ return NULL; -+ } -+ return &sm6->types[type_id]; -+} -+ -+static int global_symbol_compare(const void *a, const void *b) -+{ -+ return vkd3d_u32_compare(((const struct sm6_symbol *)a)->id, ((const struct sm6_symbol *)b)->id); -+} -+ -+static enum vkd3d_result sm6_parser_symtab_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_record *record; -+ const struct dxil_block *block; -+ struct sm6_symbol *symbol; -+ size_t i, count; -+ bool is_unique; -+ -+ sm6->p.location.line = 0; -+ sm6->p.location.column = 0; -+ -+ if (!(block = sm6_parser_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) -+ { -+ /* There should always be at least one symbol: the name of the entry point function. */ -+ WARN("No value symtab block found.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!is_unique) -+ FIXME("Ignoring extra value symtab block(s).\n"); -+ -+ sm6->p.location.line = block->id; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code == VST_CODE_ENTRY; -+ -+ if (!(sm6->global_symbols = vkd3d_calloc(count, sizeof(*sm6->global_symbols)))) -+ { -+ ERR("Failed to allocate global symbols.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ -+ if (record->code != VST_CODE_ENTRY) -+ { -+ FIXME("Unhandled symtab code %u.\n", record->code); -+ continue; -+ } -+ if (!dxil_record_validate_operand_min_count(record, 1, sm6)) -+ continue; -+ -+ symbol = &sm6->global_symbols[sm6->global_symbol_count]; -+ symbol->id = record->operands[0]; -+ if (!(symbol->name = dxil_record_to_string(record, 1))) -+ { -+ ERR("Failed to allocate symbol name.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ ++sm6->global_symbol_count; -+ } -+ -+ sm6->p.location.column = block->record_count; -+ -+ qsort(sm6->global_symbols, sm6->global_symbol_count, sizeof(*sm6->global_symbols), global_symbol_compare); -+ for (i = 1; i < sm6->global_symbol_count; ++i) -+ { -+ if (sm6->global_symbols[i].id == sm6->global_symbols[i - 1].id) -+ { -+ WARN("Invalid duplicate symbol id %u.\n", sm6->global_symbols[i].id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm6, size_t id) -+{ -+ size_t i, start; -+ -+ /* id == array index is normally true */ -+ i = start = id % sm6->global_symbol_count; -+ do -+ { -+ if (sm6->global_symbols[i].id == id) -+ return sm6->global_symbols[i].name; -+ i = (i + 1) % sm6->global_symbol_count; -+ } while (i != start); -+ -+ return NULL; -+} -+ -+static unsigned int register_get_uint_value(const struct vkd3d_shader_register *reg) -+{ -+ if (!register_is_constant(reg) || !data_type_is_integer(reg->data_type)) -+ return UINT_MAX; -+ -+ if (reg->immconst_type == VKD3D_IMMCONST_VEC4) -+ WARN("Returning vec4.x.\n"); -+ -+ if (reg->type == VKD3DSPR_IMMCONST64) -+ { -+ if (reg->u.immconst_uint64[0] > UINT_MAX) -+ FIXME("Truncating 64-bit value.\n"); -+ return reg->u.immconst_uint64[0]; -+ } -+ -+ return reg->u.immconst_uint[0]; -+} -+ -+static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) -+{ -+ return value->value_type == VALUE_TYPE_FUNCTION; -+} -+ -+static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) -+{ -+ assert(sm6_value_is_function_dcl(fn)); -+ return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); -+} -+ -+static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) -+{ -+ assert(sm6->value_count < sm6->value_capacity); -+ return &sm6->values[sm6->value_count]; -+} -+ -+static inline bool sm6_value_is_register(const struct sm6_value *value) -+{ -+ return value->value_type == VALUE_TYPE_REG; -+} -+ -+static inline bool sm6_value_is_constant(const struct sm6_value *value) -+{ -+ return sm6_value_is_register(value) && register_is_constant(&value->u.reg); -+} -+ -+static inline bool sm6_value_is_undef(const struct sm6_value *value) -+{ -+ return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; -+} -+ -+static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) -+{ -+ if (!sm6_value_is_constant(value)) -+ return UINT_MAX; -+ return register_get_uint_value(&value->u.reg); -+} -+ -+static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_shader_instruction *ins, -+ unsigned int count, struct sm6_parser *sm6) -+{ -+ struct vkd3d_shader_src_param *params = shader_parser_get_src_params(&sm6->p, count); -+ if (!params) -+ { -+ ERR("Failed to allocate src params.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating instruction src paramaters."); -+ return NULL; -+ } -+ ins->src = params; -+ ins->src_count = count; -+ return params; -+} -+ -+static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_shader_instruction *ins, -+ unsigned int count, struct sm6_parser *sm6) -+{ -+ struct vkd3d_shader_dst_param *params = shader_parser_get_dst_params(&sm6->p, count); -+ if (!params) -+ { -+ ERR("Failed to allocate dst params.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating instruction dst paramaters."); -+ return NULL; -+ } -+ ins->dst = params; -+ ins->dst_count = count; -+ return params; -+} -+ -+static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) -+{ -+ if (type->class == TYPE_CLASS_INTEGER) -+ { -+ switch (type->u.width) -+ { -+ case 8: -+ return VKD3D_DATA_UINT8; -+ case 32: -+ return VKD3D_DATA_UINT; -+ default: -+ FIXME("Unhandled width %u.\n", type->u.width); -+ return VKD3D_DATA_UINT; -+ } -+ } -+ else if (type->class == TYPE_CLASS_FLOAT) -+ { -+ switch (type->u.width) -+ { -+ case 32: -+ return VKD3D_DATA_FLOAT; -+ case 64: -+ return VKD3D_DATA_DOUBLE; -+ default: -+ FIXME("Unhandled width %u.\n", type->u.width); -+ return VKD3D_DATA_FLOAT; -+ } -+ } -+ -+ FIXME("Unhandled type %u.\n", type->class); -+ return VKD3D_DATA_UINT; -+} -+ -+static inline void dst_param_init_scalar(struct vkd3d_shader_dst_param *param, unsigned int component_idx) -+{ -+ param->write_mask = 1u << component_idx; -+ param->modifiers = 0; -+ param->shift = 0; -+} -+ -+static inline void src_param_init(struct vkd3d_shader_src_param *param) -+{ -+ param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ param->modifiers = VKD3DSPSM_NONE; -+} -+ -+static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) -+{ -+ src_param_init(param); -+ param->reg = src->u.reg; -+} -+ -+static void register_address_init(struct vkd3d_shader_register *reg, const struct sm6_value *address, -+ unsigned int idx, struct sm6_parser *sm6) -+{ -+ assert(idx < ARRAY_SIZE(reg->idx)); -+ if (sm6_value_is_constant(address)) -+ { -+ reg->idx[idx].offset = sm6_value_get_constant_uint(address); -+ } -+ else if (sm6_value_is_undef(address)) -+ { -+ reg->idx[idx].offset = 0; -+ } -+ else -+ { -+ struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&sm6->p, 1); -+ if (rel_addr) -+ src_param_init_from_value(rel_addr, address); -+ reg->idx[idx].offset = 0; -+ reg->idx[idx].rel_addr = rel_addr; -+ } -+} -+ -+/* Recurse through the block tree while maintaining a current value count. The current -+ * count is the sum of the global count plus all declarations within the current function. -+ * Store into value_capacity the highest count seen. */ -+static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, -+ const struct dxil_block *block, size_t value_count) -+{ -+ size_t i, old_value_count = value_count; -+ -+ if (block->id == MODULE_BLOCK) -+ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_module_decl_count(block)); -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ value_count = sm6_parser_compute_max_value_count(sm6, block->child_blocks[i], value_count); -+ -+ switch (block->id) -+ { -+ case CONSTANTS_BLOCK: -+ /* Function local constants are contained in a child block of the function block. */ -+ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_constants_count(block)); -+ break; -+ case FUNCTION_BLOCK: -+ /* A function must start with a block count, which emits no value. This formula is likely to -+ * overestimate the value count somewhat, but this should be no problem. */ -+ value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); -+ sm6->value_capacity = max(sm6->value_capacity, value_count); -+ sm6->functions[sm6->function_count].value_count = value_count; -+ /* The value count returns to its previous value after handling a function. */ -+ if (value_count < SIZE_MAX) -+ value_count = old_value_count; -+ break; -+ default: -+ break; -+ } -+ -+ return value_count; -+} -+ -+static size_t sm6_parser_get_value_index(struct sm6_parser *sm6, uint64_t idx) -+{ -+ size_t i; -+ -+ /* The value relative index is 32 bits. */ -+ if (idx > UINT32_MAX) -+ WARN("Ignoring upper 32 bits of relative index.\n"); -+ i = (uint32_t)sm6->value_count - (uint32_t)idx; -+ -+ /* This may underflow to produce a forward reference, but it must not exceeed the final value count. */ -+ if (i >= sm6->cur_max_value) -+ { -+ WARN("Invalid value index %"PRIx64" at %zu.\n", idx, sm6->value_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value relative index %u.", (unsigned int)idx); -+ return SIZE_MAX; -+ } -+ if (i == sm6->value_count) -+ { -+ WARN("Invalid value self-reference at %zu.\n", sm6->value_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value self-reference."); -+ return SIZE_MAX; -+ } -+ -+ return i; -+} -+ -+static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const struct dxil_record *record, -+ const struct sm6_type *fwd_type, unsigned int *rec_idx) -+{ -+ unsigned int idx; -+ uint64_t val_ref; -+ size_t operand; -+ -+ idx = *rec_idx; -+ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) -+ return SIZE_MAX; -+ val_ref = record->operands[idx++]; -+ -+ operand = sm6_parser_get_value_index(sm6, val_ref); -+ if (operand == SIZE_MAX) -+ return SIZE_MAX; -+ -+ if (operand >= sm6->value_count) -+ { -+ if (!fwd_type) -+ { -+ /* Forward references are followed by a type id unless an earlier operand set the type, -+ * or it is contained in a function declaration. */ -+ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) -+ return SIZE_MAX; -+ if (!(fwd_type = sm6_parser_get_type(sm6, record->operands[idx++]))) -+ return SIZE_MAX; -+ } -+ FIXME("Forward value references are not supported yet.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Unsupported value forward reference."); -+ return SIZE_MAX; -+ } -+ *rec_idx = idx; -+ -+ return operand; -+} -+ -+static const struct sm6_value *sm6_parser_get_value_by_ref(struct sm6_parser *sm6, -+ const struct dxil_record *record, const struct sm6_type *type, unsigned int *rec_idx) -+{ -+ size_t operand = sm6_parser_get_value_idx_by_ref(sm6, record, type, rec_idx); -+ return operand == SIZE_MAX ? NULL : &sm6->values[operand]; -+} -+ -+static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) -+{ -+ const unsigned int max_count = 15; -+ const struct sm6_type *ret_type; -+ struct sm6_value *fn; -+ unsigned int i, j; -+ -+ if (!dxil_record_validate_operand_count(record, 8, max_count, sm6)) -+ return false; -+ -+ fn = sm6_parser_get_current_value(sm6); -+ fn->value_type = VALUE_TYPE_FUNCTION; -+ if (!(fn->u.function.name = sm6_parser_get_global_symbol_name(sm6, sm6->value_count))) -+ { -+ WARN("Missing symbol name for function %zu.\n", sm6->value_count); -+ fn->u.function.name = ""; -+ } -+ -+ if (!(fn->type = sm6_parser_get_type(sm6, record->operands[0]))) -+ return false; -+ if (!sm6_type_is_function(fn->type)) -+ { -+ WARN("Type is not a function.\n"); -+ return false; -+ } -+ ret_type = fn->type->u.function->ret_type; -+ -+ if (!(fn->type = sm6_type_get_pointer_to_type(fn->type, ADDRESS_SPACE_DEFAULT, sm6))) -+ { -+ WARN("Failed to get pointer type for type %u.\n", fn->type->class); -+ return false; -+ } -+ -+ if (record->operands[1]) -+ WARN("Ignoring calling convention %#"PRIx64".\n", record->operands[1]); -+ -+ fn->u.function.is_prototype = !!record->operands[2]; -+ -+ if (record->operands[3]) -+ WARN("Ignoring linkage %#"PRIx64".\n", record->operands[3]); -+ -+ if (record->operands[4] > UINT_MAX) -+ WARN("Invalid attributes id %#"PRIx64".\n", record->operands[4]); -+ /* 1-based index. */ -+ if ((fn->u.function.attribs_id = record->operands[4])) -+ TRACE("Ignoring function attributes.\n"); -+ -+ /* These always seem to be zero. */ -+ for (i = 5, j = 0; i < min(record->operand_count, max_count); ++i) -+ j += !!record->operands[i]; -+ if (j) -+ WARN("Ignoring %u operands.\n", j); -+ -+ if (sm6_value_is_dx_intrinsic_dcl(fn) && !sm6_type_is_void(ret_type) && !sm6_type_is_numeric(ret_type) -+ && !sm6_type_is_numeric_aggregate(ret_type) && !sm6_type_is_handle(ret_type)) -+ { -+ WARN("Unexpected return type for dx intrinsic function '%s'.\n", fn->u.function.name); -+ } -+ -+ ++sm6->value_count; -+ -+ return true; -+} -+ -+static inline uint64_t decode_rotated_signed_value(uint64_t value) -+{ -+ if (value != 1) -+ { -+ bool neg = value & 1; -+ value >>= 1; -+ return neg ? -value : value; -+ } -+ return value << 63; -+} -+ -+static inline float bitcast_uint64_to_float(uint64_t value) -+{ -+ union -+ { -+ uint32_t uint32_value; -+ float float_value; -+ } u; -+ -+ u.uint32_value = value; -+ return u.float_value; -+} -+ -+static inline double bitcast_uint64_to_double(uint64_t value) -+{ -+ union -+ { -+ uint64_t uint64_value; -+ double double_value; -+ } u; -+ -+ u.uint64_value = value; -+ return u.double_value; -+} -+ -+static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) -+{ -+ enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; -+ const struct sm6_type *type, *elem_type; -+ enum vkd3d_data_type reg_data_type; -+ const struct dxil_record *record; -+ struct sm6_value *dst; -+ size_t i, value_idx; -+ uint64_t value; -+ -+ for (i = 0, type = NULL; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ value_idx = sm6->value_count; -+ -+ if (record->code == CST_CODE_SETTYPE) -+ { -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!(type = sm6_parser_get_type(sm6, record->operands[0]))) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ elem_type = sm6_type_get_element_type(type); -+ if (sm6_type_is_numeric(elem_type)) -+ { -+ reg_data_type = vkd3d_data_type_from_sm6_type(elem_type); -+ reg_type = elem_type->u.width > 32 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST; -+ } -+ else -+ { -+ reg_data_type = VKD3D_DATA_UNUSED; -+ reg_type = VKD3DSPR_INVALID; -+ } -+ -+ if (i == block->record_count - 1) -+ WARN("Unused SETTYPE record.\n"); -+ -+ continue; -+ } -+ -+ if (!type) -+ { -+ WARN("Constant record %zu has no type.\n", value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ dst = sm6_parser_get_current_value(sm6); -+ dst->type = type; -+ dst->value_type = VALUE_TYPE_REG; -+ dst->u.reg.type = reg_type; -+ dst->u.reg.immconst_type = VKD3D_IMMCONST_SCALAR; -+ dst->u.reg.data_type = reg_data_type; -+ -+ switch (record->code) -+ { -+ case CST_CODE_NULL: -+ /* Register constant data is already zero-filled. */ -+ break; -+ -+ case CST_CODE_INTEGER: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!sm6_type_is_integer(type)) -+ { -+ WARN("Invalid integer of non-integer type %u at constant idx %zu.\n", type->class, value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ value = decode_rotated_signed_value(record->operands[0]); -+ if (type->u.width <= 32) -+ dst->u.reg.u.immconst_uint[0] = value & ((1ull << type->u.width) - 1); -+ else -+ dst->u.reg.u.immconst_uint64[0] = value; -+ -+ break; -+ -+ case CST_CODE_FLOAT: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!sm6_type_is_floating_point(type)) -+ { -+ WARN("Invalid float of non-fp type %u at constant idx %zu.\n", type->class, value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (type->u.width == 16) -+ FIXME("Half float type is not supported yet.\n"); -+ else if (type->u.width == 32) -+ dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); -+ else if (type->u.width == 64) -+ dst->u.reg.u.immconst_double[0] = bitcast_uint64_to_double(record->operands[0]); -+ else -+ vkd3d_unreachable(); -+ -+ break; -+ -+ case CST_CODE_DATA: -+ WARN("Unhandled constant array.\n"); -+ break; -+ -+ case CST_CODE_UNDEF: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ dst->u.reg.type = VKD3DSPR_UNDEF; -+ /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ -+ dst->is_undefined = true; -+ break; -+ -+ default: -+ FIXME("Unhandled constant code %u.\n", record->code); -+ dst->u.reg.type = VKD3DSPR_UNDEF; -+ break; -+ } -+ -+ ++sm6->value_count; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) -+{ -+ if (!shader_instruction_array_reserve(&sm6->p.instructions, sm6->p.instructions.count + extra)) -+ { -+ ERR("Failed to allocate instruction.\n"); -+ return NULL; -+ } -+ return &sm6->p.instructions.elements[sm6->p.instructions.count]; -+} -+ -+/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ -+static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, -+ enum vkd3d_shader_opcode handler_idx) -+{ -+ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); -+ assert(ins); -+ shader_instruction_init(ins, handler_idx); -+ ++sm6->p.instructions.count; -+ return ins; -+} -+ -+static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_block *block = &sm6->root_block; -+ const struct dxil_record *record; -+ uint64_t version; -+ size_t i; -+ -+ sm6->p.location.line = block->id; -+ sm6->p.location.column = 0; -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ switch (record->code) -+ { -+ case MODULE_CODE_FUNCTION: -+ if (!sm6_parser_declare_function(sm6, record)) -+ { -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL, -+ "A DXIL function declaration is invalid."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ break; -+ -+ case MODULE_CODE_GLOBALVAR: -+ FIXME("Global variables are not implemented yet.\n"); -+ break; -+ -+ case MODULE_CODE_VERSION: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if ((version = record->operands[0]) != 1) -+ { -+ FIXME("Unsupported format version %#"PRIx64".\n", version); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, -+ "Bitcode format version %#"PRIx64" is unsupported.", version); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ break; -+ -+ default: -+ break; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static void dst_param_io_init(struct vkd3d_shader_dst_param *param, -+ const struct signature_element *e, enum vkd3d_shader_register_type reg_type) -+{ -+ enum vkd3d_shader_component_type component_type; -+ -+ param->write_mask = e->mask; -+ param->modifiers = 0; -+ param->shift = 0; -+ /* DXIL types do not have signedness. Load signed elements as unsigned. */ -+ component_type = e->component_type == VKD3D_SHADER_COMPONENT_INT ? VKD3D_SHADER_COMPONENT_UINT : e->component_type; -+ shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(component_type), 0); -+} -+ -+static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, -+ enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) -+{ -+ struct vkd3d_shader_dst_param *param; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < s->element_count; ++i) -+ { -+ e = &s->elements[i]; -+ -+ param = ¶ms[i]; -+ dst_param_io_init(param, e, reg_type); -+ param->reg.idx[0].offset = i; -+ param->reg.idx_count = 1; -+ } -+} -+ -+static void sm6_parser_emit_signature(struct sm6_parser *sm6, const struct shader_signature *s, -+ enum vkd3d_shader_opcode handler_idx, enum vkd3d_shader_opcode siv_handler_idx, -+ struct vkd3d_shader_dst_param *params) -+{ -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_dst_param *param; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < s->element_count; ++i) -+ { -+ e = &s->elements[i]; -+ -+ /* Do not check e->used_mask because in some cases it is zero for used elements. -+ * TODO: scan ahead for used I/O elements. */ -+ -+ if (e->sysval_semantic != VKD3D_SHADER_SV_NONE && e->sysval_semantic != VKD3D_SHADER_SV_TARGET) -+ { -+ ins = sm6_parser_add_instruction(sm6, siv_handler_idx); -+ param = &ins->declaration.register_semantic.reg; -+ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); -+ } -+ else -+ { -+ ins = sm6_parser_add_instruction(sm6, handler_idx); -+ param = &ins->declaration.dst; -+ } -+ -+ *param = params[i]; -+ } -+} -+ -+static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) -+{ -+ sm6_parser_init_signature(sm6, output_signature, -+ (sm6->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3DSPR_COLOROUT : VKD3DSPR_OUTPUT, -+ sm6->output_params); -+} -+ -+static void sm6_parser_emit_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) -+{ -+ sm6_parser_emit_signature(sm6, output_signature, VKD3DSIH_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT_SIV, sm6->output_params); -+} -+ -+static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) -+{ -+ size_t i, count = sm6->function_count; -+ -+ for (i = 0; i < sm6->value_count; ++i) -+ { -+ if (sm6_type_is_function_pointer(sm6->values[i].type) && !sm6->values[i].u.function.is_prototype && !count--) -+ break; -+ } -+ if (i == sm6->value_count) -+ return NULL; -+ -+ ++sm6->function_count; -+ return &sm6->values[i]; -+} -+ -+static struct sm6_block *sm6_block_create() -+{ -+ struct sm6_block *block = vkd3d_calloc(1, sizeof(*block)); -+ return block; -+} -+ -+static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, struct sm6_block *code_block, -+ enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct vkd3d_shader_instruction *ins) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_dst_param *dst_param; -+ const struct shader_signature *signature; -+ unsigned int row_index, column_index; -+ const struct signature_element *e; -+ const struct sm6_value *value; -+ -+ row_index = sm6_value_get_constant_uint(operands[0]); -+ column_index = sm6_value_get_constant_uint(operands[2]); -+ -+ signature = &sm6->p.shader_desc.output_signature; -+ if (row_index >= signature->element_count) -+ { -+ WARN("Invalid row index %u.\n", row_index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid output row index %u.", row_index); -+ return; -+ } -+ e = &signature->elements[row_index]; -+ -+ if (column_index >= VKD3D_VEC4_SIZE) -+ { -+ WARN("Invalid column index %u.\n", column_index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid output column index %u.", column_index); -+ return; -+ } -+ -+ value = operands[3]; -+ if (!sm6_value_is_register(value)) -+ { -+ WARN("Source value is not a register.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Expected store operation source to be a register."); -+ return; -+ } -+ -+ shader_instruction_init(ins, VKD3DSIH_MOV); -+ -+ if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) -+ return; -+ dst_param_init_scalar(dst_param, column_index); -+ dst_param->reg = sm6->output_params[row_index].reg; -+ if (e->register_count > 1) -+ register_address_init(&dst_param->reg, operands[1], 0, sm6); -+ -+ if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ src_param_init_from_value(src_param, value); -+} -+ -+struct sm6_dx_opcode_info -+{ -+ const char ret_type; -+ const char *operand_info; -+ void (*handler)(struct sm6_parser *, struct sm6_block *, enum dx_intrinsic_opcode, -+ const struct sm6_value **, struct vkd3d_shader_instruction *); -+}; -+ -+/* -+ 8 -> int8 -+ i -> int32 -+ v -> void -+ o -> overloaded -+ */ -+static const struct sm6_dx_opcode_info sm6_dx_op_table[] = -+{ -+ [DX_STORE_OUTPUT ] = {'v', "ii8o", sm6_parser_emit_dx_store_output}, -+}; -+ -+static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_type *type, char info_type) -+{ -+ switch (info_type) -+ { -+ case 0: -+ FIXME("Invalid operand count.\n"); -+ return false; -+ case '8': -+ return sm6_type_is_i8(type); -+ case 'i': -+ return sm6_type_is_i32(type); -+ case 'v': -+ return !type; -+ case 'o': -+ /* TODO: some type checking may be possible */ -+ return true; -+ default: -+ FIXME("Unhandled operand code '%c'.\n", info_type); -+ return false; -+ } -+} -+ -+static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const char *name, -+ const struct sm6_value **operands, unsigned int operand_count, struct sm6_value *dst) -+{ -+ const struct sm6_dx_opcode_info *info; -+ unsigned int i; -+ -+ info = &sm6_dx_op_table[op]; -+ -+ if (!sm6_parser_validate_operand_type(sm6, dst->type, info->ret_type)) -+ { -+ WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); -+ /* Return type validation failure is not so critical. We only need to set -+ * a data type for the SSA result. */ -+ } -+ -+ for (i = 0; i < operand_count; ++i) -+ { -+ const struct sm6_value *value = operands[i]; -+ if (!sm6_value_is_register(value) || !sm6_parser_validate_operand_type(sm6, value->type, info->operand_info[i])) -+ { -+ WARN("Failed to validate operand %u for dx intrinsic id %u, '%s'.\n", i + 1, op, name); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Operand %u for call to dx intrinsic function '%s' is invalid.", i + 1, name); -+ return false; -+ } -+ } -+ if (info->operand_info[operand_count]) -+ { -+ WARN("Missing operands for dx intrinsic id %u, '%s'.\n", op, name); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Call to dx intrinsic function '%s' has missing operands.", name); -+ return false; -+ } -+ -+ return true; -+} -+ -+static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shader_instruction *ins, -+ struct sm6_value *dst) -+{ -+ const struct sm6_type *type; -+ -+ ins->handler_idx = VKD3DSIH_NOP; -+ -+ if (!dst->type) -+ return; -+ -+ type = sm6_type_get_scalar_type(dst->type, 0); -+ shader_register_init(&dst->u.reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0); -+ /* dst->is_undefined is not set here because it flags only explicitly undefined values. */ -+} -+ -+static void sm6_parser_decode_dx_op(struct sm6_parser *sm6, struct sm6_block *code_block, enum dx_intrinsic_opcode op, -+ const char *name, const struct sm6_value **operands, unsigned int operand_count, -+ struct vkd3d_shader_instruction *ins, struct sm6_value *dst) -+{ -+ if (op >= ARRAY_SIZE(sm6_dx_op_table) || !sm6_dx_op_table[op].operand_info) -+ { -+ FIXME("Unhandled dx intrinsic function id %u, '%s'.\n", op, name); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC, -+ "Call to intrinsic function %s is unhandled.", name); -+ sm6_parser_emit_unhandled(sm6, ins, dst); -+ return; -+ } -+ -+ if (sm6_parser_validate_dx_op(sm6, op, name, operands, operand_count, dst)) -+ sm6_dx_op_table[op].handler(sm6, code_block, op, operands, ins); -+ else -+ sm6_parser_emit_unhandled(sm6, ins, dst); -+} -+ -+static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) -+{ -+ const struct sm6_value *operands[DXIL_OP_MAX_OPERANDS]; -+ const struct sm6_value *fn_value, *op_value; -+ unsigned int i = 1, j, operand_count; -+ const struct sm6_type *type = NULL; -+ uint64_t call_conv; -+ -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return; -+ -+ /* TODO: load the 1-based attributes index from record->operands[0] and validate against attribute count. */ -+ -+ if ((call_conv = record->operands[i++]) & CALL_CONV_FLAG_EXPLICIT_TYPE) -+ type = sm6_parser_get_type(sm6, record->operands[i++]); -+ if (call_conv &= ~CALL_CONV_FLAG_EXPLICIT_TYPE) -+ WARN("Ignoring calling convention %#"PRIx64".\n", call_conv); -+ -+ if (!(fn_value = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))) -+ return; -+ if (!sm6_value_is_function_dcl(fn_value)) -+ { -+ WARN("Function target value is not a function declaration.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Function call target value is not a function declaration."); -+ return; -+ } -+ -+ if (type && type != fn_value->type->u.pointer.type) -+ WARN("Explicit call type does not match function type.\n"); -+ type = fn_value->type->u.pointer.type; -+ -+ if (!sm6_type_is_void(type->u.function->ret_type)) -+ dst->type = type->u.function->ret_type; -+ -+ operand_count = type->u.function->param_count; -+ if (operand_count > ARRAY_SIZE(operands)) -+ { -+ WARN("Ignoring %zu operands.\n", operand_count - ARRAY_SIZE(operands)); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %zu operands for function call.", operand_count - ARRAY_SIZE(operands)); -+ operand_count = ARRAY_SIZE(operands); -+ } -+ -+ for (j = 0; j < operand_count; ++j) -+ { -+ if (!(operands[j] = sm6_parser_get_value_by_ref(sm6, record, type->u.function->param_types[j], &i))) -+ return; -+ } -+ if ((j = record->operand_count - i)) -+ { -+ WARN("Ignoring %u operands beyond the function parameter list.\n", j); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %u function call operands beyond the parameter list.", j); -+ } -+ -+ if (!fn_value->u.function.is_prototype) -+ { -+ FIXME("Unhandled call to local function.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Call to a local function is unsupported."); -+ return; -+ } -+ if (!sm6_value_is_dx_intrinsic_dcl(fn_value)) -+ WARN("External function is not a dx intrinsic.\n"); -+ -+ if (!operand_count) -+ { -+ WARN("Missing dx intrinsic function id.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "The id for a dx intrinsic function is missing."); -+ return; -+ } -+ -+ op_value = operands[0]; -+ if (!sm6_value_is_constant(op_value) || !sm6_type_is_integer(op_value->type)) -+ { -+ WARN("dx intrinsic function id is not a constant int.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Expected a constant integer dx intrinsic function id."); -+ return; -+ } -+ sm6_parser_decode_dx_op(sm6, code_block, register_get_uint_value(&op_value->u.reg), -+ fn_value->u.function.name, &operands[1], operand_count - 1, ins, dst); -+} -+ -+static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) -+{ -+ if (!dxil_record_validate_operand_count(record, 0, 1, sm6)) -+ return; -+ -+ if (record->operand_count) -+ FIXME("Non-void return is not implemented.\n"); -+ -+ ins->handler_idx = VKD3DSIH_NOP; -+} -+ -+static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, -+ struct sm6_function *function) -+{ -+ struct vkd3d_shader_instruction *ins; -+ const struct dxil_record *record; -+ bool ret_found, is_terminator; -+ struct sm6_block *code_block; -+ struct sm6_value *dst; -+ size_t i, block_idx; -+ -+ if (sm6->function_count) -+ { -+ FIXME("Multiple functions are not supported yet.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!(function->declaration = sm6_parser_next_function_definition(sm6))) -+ { -+ WARN("Failed to find definition to match function body.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (block->record_count < 2) -+ { -+ /* It should contain at least a block count and a RET instruction. */ -+ WARN("Invalid function block record count %zu.\n", block->record_count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (block->records[0]->code != FUNC_CODE_DECLAREBLOCKS || !block->records[0]->operand_count -+ || block->records[0]->operands[0] > UINT_MAX) -+ { -+ WARN("Block count declaration not found or invalid.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(function->block_count = block->records[0]->operands[0])) -+ { -+ WARN("Function contains no blocks.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (function->block_count > 1) -+ { -+ FIXME("Branched shaders are not supported yet.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(function->blocks[0] = sm6_block_create())) -+ { -+ ERR("Failed to allocate code block.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ code_block = function->blocks[0]; -+ -+ sm6->cur_max_value = function->value_count; -+ -+ for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ -+ if (!code_block) -+ { -+ WARN("Invalid block count %zu.\n", function->block_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid block count %zu.", function->block_count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ /* block->record_count - 1 is the instruction count, but some instructions -+ * can emit >1 IR instruction, so extra may be used. */ -+ if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, -+ max(code_block->instruction_count + 1, block->record_count), sizeof(*code_block->instructions))) -+ { -+ ERR("Failed to allocate instructions.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ ins = &code_block->instructions[code_block->instruction_count]; -+ ins->handler_idx = VKD3DSIH_INVALID; -+ -+ dst = sm6_parser_get_current_value(sm6); -+ dst->type = NULL; -+ dst->value_type = VALUE_TYPE_REG; -+ is_terminator = false; -+ -+ record = block->records[i]; -+ switch (record->code) -+ { -+ case FUNC_CODE_INST_CALL: -+ sm6_parser_emit_call(sm6, record, code_block, ins, dst); -+ break; -+ case FUNC_CODE_INST_RET: -+ sm6_parser_emit_ret(sm6, record, code_block, ins); -+ is_terminator = true; -+ ret_found = true; -+ break; -+ default: -+ FIXME("Unhandled dxil instruction %u.\n", record->code); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (sm6->p.failed) -+ return VKD3D_ERROR; -+ assert(ins->handler_idx != VKD3DSIH_INVALID); -+ -+ if (is_terminator) -+ { -+ ++block_idx; -+ code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; -+ } -+ if (code_block) -+ code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; -+ else -+ assert(ins->handler_idx == VKD3DSIH_NOP); -+ -+ sm6->value_count += !!dst->type; -+ } -+ -+ if (!ret_found) -+ { -+ WARN("Function contains no RET instruction.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static bool sm6_block_emit_instructions(struct sm6_block *block, struct sm6_parser *sm6) -+{ -+ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, block->instruction_count + 1); -+ -+ if (!ins) -+ return false; -+ -+ memcpy(ins, block->instructions, block->instruction_count * sizeof(*block->instructions)); -+ sm6->p.instructions.count += block->instruction_count; -+ -+ sm6_parser_add_instruction(sm6, VKD3DSIH_RET); -+ -+ return true; -+} -+ -+static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, -+ unsigned int level) -+{ -+ size_t i, old_value_count = sm6->value_count; -+ struct sm6_function *function; -+ enum vkd3d_result ret; -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ { -+ if ((ret = sm6_parser_module_init(sm6, block->child_blocks[i], level + 1)) < 0) -+ return ret; -+ } -+ -+ sm6->p.location.line = block->id; -+ sm6->p.location.column = 0; -+ -+ switch (block->id) -+ { -+ case CONSTANTS_BLOCK: -+ function = &sm6->functions[sm6->function_count]; -+ sm6->cur_max_value = function->value_count; -+ return sm6_parser_constants_init(sm6, block); -+ -+ case FUNCTION_BLOCK: -+ function = &sm6->functions[sm6->function_count]; -+ if ((ret = sm6_parser_function_init(sm6, block, function)) < 0) -+ return ret; -+ /* The value index returns to its previous value after handling a function. It's usually nonzero -+ * at the start because of global constants/variables/function declarations. Function constants -+ * occur in a child block, so value_count is already saved before they are emitted. */ -+ memset(&sm6->values[old_value_count], 0, (sm6->value_count - old_value_count) * sizeof(*sm6->values)); -+ sm6->value_count = old_value_count; -+ break; -+ -+ case BLOCKINFO_BLOCK: -+ case MODULE_BLOCK: -+ case PARAMATTR_BLOCK: -+ case PARAMATTR_GROUP_BLOCK: -+ case VALUE_SYMTAB_BLOCK: -+ case METADATA_BLOCK: -+ case METADATA_ATTACHMENT_BLOCK: -+ case TYPE_BLOCK: -+ break; -+ -+ default: -+ FIXME("Unhandled block id %u.\n", block->id); -+ break; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static void sm6_type_table_cleanup(struct sm6_type *types, size_t count) -+{ -+ size_t i; -+ -+ if (!types) -+ return; -+ -+ for (i = 0; i < count; ++i) -+ { -+ switch (types[i].class) -+ { -+ case TYPE_CLASS_STRUCT: -+ vkd3d_free((void *)types[i].u.struc->name); -+ vkd3d_free(types[i].u.struc); -+ break; -+ case TYPE_CLASS_FUNCTION: -+ vkd3d_free(types[i].u.function); -+ break; -+ default: -+ break; -+ } -+ } -+ -+ vkd3d_free(types); -+} -+ -+static void sm6_symtab_cleanup(struct sm6_symbol *symbols, size_t count) -+{ -+ size_t i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free((void *)symbols[i].name); -+ vkd3d_free(symbols); -+} -+ -+static void sm6_block_destroy(struct sm6_block *block) -+{ -+ vkd3d_free(block->instructions); -+ vkd3d_free(block); -+} -+ -+static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) -+{ -+ size_t i, j; -+ -+ for (i = 0; i < count; ++i) -+ { -+ for (j = 0; j < functions[i].block_count; ++j) -+ sm6_block_destroy(functions[i].blocks[j]); -+ } -+ vkd3d_free(functions); -+} -+ -+static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) -+{ -+ struct sm6_parser *sm6 = sm6_parser(parser); -+ -+ dxil_block_destroy(&sm6->root_block); -+ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -+ shader_instruction_array_destroy(&parser->instructions); -+ sm6_type_table_cleanup(sm6->types, sm6->type_count); -+ sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); -+ sm6_functions_cleanup(sm6->functions, sm6->function_count); -+ vkd3d_free(sm6->values); -+ free_shader_desc(&parser->shader_desc); -+ vkd3d_free(sm6); -+} -+ -+static const struct vkd3d_shader_parser_ops sm6_parser_ops = -+{ -+ .parser_destroy = sm6_parser_destroy, -+}; -+ -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, -+ const char *source_name, struct vkd3d_shader_message_context *message_context) -+{ -+ const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; -+ const struct vkd3d_shader_location location = {.source_name = source_name}; -+ uint32_t version_token, dxil_version, token_count, magic; -+ unsigned int chunk_offset, chunk_size; -+ size_t count, length, function_count; -+ enum bitcode_block_abbreviation abbr; -+ struct vkd3d_shader_version version; -+ struct dxil_block *block; -+ enum vkd3d_result ret; -+ unsigned int i; -+ -+ count = byte_code_size / sizeof(*byte_code); -+ if (count < 6) -+ { -+ WARN("Invalid data size %zu.\n", byte_code_size); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, -+ "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ version_token = byte_code[0]; -+ TRACE("Compiler version: 0x%08x.\n", version_token); -+ token_count = byte_code[1]; -+ TRACE("Token count: %u.\n", token_count); -+ -+ if (token_count < 6 || count < token_count) -+ { -+ WARN("Invalid token count %u (word count %zu).\n", token_count, count); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, -+ "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (byte_code[2] != TAG_DXIL) -+ WARN("Unknown magic number 0x%08x.\n", byte_code[2]); -+ -+ dxil_version = byte_code[3]; -+ if (dxil_version > 0x102) -+ WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); -+ else -+ TRACE("DXIL version: 0x%08x.\n", dxil_version); -+ -+ chunk_offset = byte_code[4]; -+ if (chunk_offset < 16 || chunk_offset >= byte_code_size) -+ { -+ WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, -+ "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ chunk_size = byte_code[5]; -+ if (chunk_size > byte_code_size - chunk_offset) -+ { -+ WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", -+ chunk_size, byte_code_size, chunk_offset); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, -+ "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", -+ chunk_size, byte_code_size, chunk_offset); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); -+ if ((magic = sm6->start[0]) != BITCODE_MAGIC) -+ { -+ WARN("Unknown magic number 0x%08x.\n", magic); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, -+ "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); -+ } -+ -+ sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; -+ -+ if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) -+ { -+ FIXME("Unknown shader type %#x.\n", version.type); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, -+ "Unknown shader type %#x.", version.type); -+ } -+ -+ version.major = VKD3D_SM6_VERSION_MAJOR(version_token); -+ version.minor = VKD3D_SM6_VERSION_MINOR(version_token); -+ -+ if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) -+ { -+ WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, -+ "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ /* Estimate instruction count to avoid reallocation in most shaders. */ -+ count = max(token_count, 400) - 400; -+ vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, -+ (count + (count >> 2)) / 2u + 10); -+ sm6->ptr = &sm6->start[1]; -+ sm6->bitpos = 2; -+ -+ block = &sm6->root_block; -+ if ((ret = dxil_block_init(block, NULL, sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL bitcode chunk."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, -+ "DXIL bitcode chunk has invalid bitcode."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -+ sm6->abbrevs = NULL; -+ sm6->abbrev_count = 0; -+ -+ length = sm6->ptr - sm6->start - block->start; -+ if (length != block->length) -+ { -+ WARN("Invalid block length %zu; expected %u.\n", length, block->length); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, -+ "Root block ends with length %zu but indicated length is %u.", length, block->length); -+ } -+ if (sm6->ptr != sm6->end) -+ { -+ size_t expected_length = sm6->end - sm6->start; -+ length = sm6->ptr - sm6->start; -+ WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, -+ "Module ends with length %zu but indicated length is %zu.", length, expected_length); -+ } -+ -+ if ((ret = sm6_parser_type_table_init(sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL type table."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE, -+ "DXIL type table is invalid."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ if ((ret = sm6_parser_symtab_init(sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL value symbol table."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB, -+ "DXIL value symbol table is invalid."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ if (!(sm6->output_params = shader_parser_get_dst_params(&sm6->p, output_signature->element_count))) -+ { -+ ERR("Failed to allocate output parameters.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating output parameters."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ function_count = dxil_block_compute_function_count(&sm6->root_block); -+ if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) -+ { -+ ERR("Failed to allocate function array.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating DXIL function array."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) -+ { -+ WARN("Value array count overflowed.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Overflow occurred in the DXIL module value count."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) -+ { -+ ERR("Failed to allocate value array.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating DXIL value array."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = sm6_parser_globals_init(sm6)) < 0) -+ { -+ WARN("Failed to load global declarations.\n"); -+ return ret; -+ } -+ -+ sm6_parser_init_output_signature(sm6, output_signature); -+ -+ if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL module."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "DXIL module is invalid."); -+ return ret; -+ } -+ -+ if (!sm6_parser_require_space(sm6, output_signature->element_count)) -+ { -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory emitting shader signature declarations."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ sm6_parser_emit_output_signature(sm6, output_signature); -+ -+ for (i = 0; i < sm6->function_count; ++i) -+ { -+ if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) -+ { -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory emitting shader instructions."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ -+ dxil_block_destroy(&sm6->root_block); -+ -+ return VKD3D_OK; -+} -+ -+int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -+{ -+ struct vkd3d_shader_desc *shader_desc; -+ uint32_t *byte_code = NULL; -+ struct sm6_parser *sm6; -+ int ret; -+ -+ ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ -+ if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) -+ { -+ ERR("Failed to allocate parser.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ shader_desc = &sm6->p.shader_desc; -+ shader_desc->is_dxil = true; -+ if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, -+ shader_desc)) < 0) -+ { -+ WARN("Failed to extract shader, vkd3d result %d.\n", ret); -+ vkd3d_free(sm6); -+ return ret; -+ } -+ -+ sm6->p.shader_desc = *shader_desc; -+ shader_desc = &sm6->p.shader_desc; -+ -+ if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) -+ { -+ /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC -+ * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ -+ if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) -+ ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); -+ else -+ memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); -+ } -+ -+ ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, -+ compile_info->source_name, message_context); -+ vkd3d_free(byte_code); -+ -+ if (ret < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ sm6_parser_destroy(&sm6->p); -+ return ret; -+ } -+ -+ *parser = &sm6->p; -+ -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index ba5bcfbfaf0..5fe9047bf25 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -72,6 +72,27 @@ void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, c - ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; - } - -+char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) -+{ -+ struct vkd3d_string_buffer *string; -+ va_list args; -+ char *ret; -+ -+ if (!(string = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ va_start(args, fmt); -+ if (vkd3d_string_buffer_vprintf(string, fmt, args) < 0) -+ { -+ va_end(args); -+ hlsl_release_string_buffer(ctx, string); -+ return NULL; -+ } -+ va_end(args); -+ ret = hlsl_strdup(ctx, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return ret; -+} -+ - bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var) - { - struct hlsl_scope *scope = ctx->cur_scope; -@@ -430,6 +451,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl - return type; - } - -+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ enum hlsl_regset regset, unsigned int index) -+{ -+ struct hlsl_type *next_type; -+ unsigned int offset = 0; -+ unsigned int idx; -+ -+ while (!type_is_single_component(type)) -+ { -+ next_type = type; -+ idx = traverse_path_from_component_index(ctx, &next_type, &index); -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ if (regset == HLSL_REGSET_NUMERIC) -+ offset += idx; -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ offset += type->e.record.fields[idx].reg_offset[regset]; -+ break; -+ -+ case HLSL_CLASS_ARRAY: -+ if (regset == HLSL_REGSET_NUMERIC) -+ offset += idx * align(type->e.array.type->reg_size[regset], 4); -+ else -+ offset += idx * type->e.array.type->reg_size[regset]; -+ break; -+ -+ case HLSL_CLASS_OBJECT: -+ assert(idx == 0); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ type = next_type; -+ } -+ -+ return offset; -+} -+ - static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, - unsigned int path_len) - { -@@ -524,7 +590,9 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de - unsigned int i; - - assert(deref); -- assert(!deref->offset.node); -+ -+ if (deref->offset.node) -+ return deref->data_type; - - type = deref->var->data_type; - for (i = 0; i < deref->path_len; ++i) -@@ -626,6 +694,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba - type->e.array.type = basic_type; - type->dimx = basic_type->dimx; - type->dimy = basic_type->dimy; -+ type->sampler_dim = basic_type->sampler_dim; - hlsl_type_calculate_reg_size(ctx, type); - - list_add_tail(&ctx->types, &type->entry); -@@ -991,21 +1060,31 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem - { - struct vkd3d_string_buffer *string; - struct hlsl_ir_var *var; -- static LONG counter; -- const char *name; - - if (!(string = hlsl_get_string_buffer(ctx))) - return NULL; -- vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); -- if (!(name = hlsl_strdup(ctx, string->buffer))) -- { -- hlsl_release_string_buffer(ctx, string); -- return NULL; -- } -- var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); -+ vkd3d_string_buffer_printf(string, "<%s-%u>", template, ctx->internal_name_counter++); -+ var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); - hlsl_release_string_buffer(ctx, string); -+ return var; -+} -+ -+struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -+ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope) -+{ -+ struct hlsl_ir_var *var; -+ const char *name_copy; -+ -+ if (!(name_copy = hlsl_strdup(ctx, name))) -+ return NULL; -+ var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); - if (var) -- list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); -+ { -+ if (dummy_scope) -+ list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); -+ else -+ list_add_tail(&ctx->globals->vars, &var->scope_entry); -+ } - return var; - } - -@@ -1432,7 +1511,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v - } - - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, -- const struct vkd3d_shader_location *loc) -+ struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_jump *jump; - -@@ -1440,6 +1519,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type - return NULL; - init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); - jump->type = type; -+ hlsl_src_from_node(&jump->condition, condition); - return &jump->node; - } - -@@ -1484,7 +1564,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, - hlsl_block_cleanup(dst_block); - return false; - } -- list_add_tail(&dst_block->instrs, &dst->entry); -+ hlsl_block_add_instr(dst_block, dst); - - if (!list_empty(&src->uses)) - { -@@ -1585,9 +1665,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma - return dst; - } - --static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) -+static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) - { -- return hlsl_new_jump(ctx, src->type, &src->node.loc); -+ return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); - } - - static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) -@@ -1728,7 +1808,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - return clone_index(ctx, map, hlsl_ir_index(instr)); - - case HLSL_IR_JUMP: -- return clone_jump(ctx, hlsl_ir_jump(instr)); -+ return clone_jump(ctx, map, hlsl_ir_jump(instr)); - - case HLSL_IR_LOAD: - return clone_load(ctx, map, hlsl_ir_load(instr)); -@@ -2065,6 +2145,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - } - -+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -+ unsigned int index) -+{ -+ struct hlsl_type *type = var->data_type, *current_type; -+ struct vkd3d_string_buffer *buffer; -+ unsigned int element_index; -+ -+ if (!(buffer = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ -+ vkd3d_string_buffer_printf(buffer, "%s", var->name); -+ -+ while (!type_is_single_component(type)) -+ { -+ current_type = type; -+ element_index = traverse_path_from_component_index(ctx, &type, &index); -+ if (current_type->class == HLSL_CLASS_STRUCT) -+ vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); -+ else -+ vkd3d_string_buffer_printf(buffer, "[%u]", element_index); -+ } -+ -+ return buffer; -+} -+ - const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) - { - struct vkd3d_string_buffer *string; -@@ -2123,18 +2228,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - { - static const char * const names[] = - { -- "HLSL_IR_CALL", -- "HLSL_IR_CONSTANT", -- "HLSL_IR_EXPR", -- "HLSL_IR_IF", -- "HLSL_IR_INDEX", -- "HLSL_IR_LOAD", -- "HLSL_IR_LOOP", -- "HLSL_IR_JUMP", -- "HLSL_IR_RESOURCE_LOAD", -- "HLSL_IR_RESOURCE_STORE", -- "HLSL_IR_STORE", -- "HLSL_IR_SWIZZLE", -+ [HLSL_IR_CALL ] = "HLSL_IR_CALL", -+ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", -+ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", -+ [HLSL_IR_IF ] = "HLSL_IR_IF", -+ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", -+ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", -+ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", -+ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", -+ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", -+ [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", -+ [HLSL_IR_STORE ] = "HLSL_IR_STORE", -+ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -2146,10 +2251,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - { - static const char * const names[] = - { -- "HLSL_IR_JUMP_BREAK", -- "HLSL_IR_JUMP_CONTINUE", -- "HLSL_IR_JUMP_DISCARD", -- "HLSL_IR_JUMP_RETURN", -+ [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", -+ [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", -+ [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", -+ [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", -+ [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", - }; - - assert(type < ARRAY_SIZE(names)); -@@ -2158,11 +2264,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - - static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr); - --static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) -+static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) - { - struct hlsl_ir_node *instr; - -- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - dump_instr(ctx, buffer, instr); - vkd3d_string_buffer_printf(buffer, "\n"); -@@ -2337,7 +2443,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_COS] = "cos", - [HLSL_OP1_COS_REDUCED] = "cos_reduced", - [HLSL_OP1_DSX] = "dsx", -+ [HLSL_OP1_DSX_COARSE] = "dsx_coarse", -+ [HLSL_OP1_DSX_FINE] = "dsx_fine", - [HLSL_OP1_DSY] = "dsy", -+ [HLSL_OP1_DSY_COARSE] = "dsy_coarse", -+ [HLSL_OP1_DSY_FINE] = "dsy_fine", - [HLSL_OP1_EXP2] = "exp2", - [HLSL_OP1_FRACT] = "fract", - [HLSL_OP1_LOG2] = "log2", -@@ -2376,7 +2486,8 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP2_RSHIFT] = ">>", - - [HLSL_OP3_DP2ADD] = "dp2add", -- [HLSL_OP3_LERP] = "lerp", -+ [HLSL_OP3_MOVC] = "movc", -+ [HLSL_OP3_TERNARY] = "ternary", - }; - - return op_names[op]; -@@ -2400,9 +2511,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - vkd3d_string_buffer_printf(buffer, "if ("); - dump_src(buffer, &if_node->condition); - vkd3d_string_buffer_printf(buffer, ") {\n"); -- dump_instr_list(ctx, buffer, &if_node->then_block.instrs); -+ dump_block(ctx, buffer, &if_node->then_block); - vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); -- dump_instr_list(ctx, buffer, &if_node->else_block.instrs); -+ dump_block(ctx, buffer, &if_node->else_block); - vkd3d_string_buffer_printf(buffer, " %10s }", ""); - } - -@@ -2418,8 +2529,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i - vkd3d_string_buffer_printf(buffer, "continue"); - break; - -- case HLSL_IR_JUMP_DISCARD: -- vkd3d_string_buffer_printf(buffer, "discard"); -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ vkd3d_string_buffer_printf(buffer, "discard_neg"); -+ break; -+ -+ case HLSL_IR_JUMP_DISCARD_NZ: -+ vkd3d_string_buffer_printf(buffer, "discard_nz"); - break; - - case HLSL_IR_JUMP_RETURN: -@@ -2431,7 +2546,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i - static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) - { - vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); -- dump_instr_list(ctx, buffer, &loop->body.instrs); -+ dump_block(ctx, buffer, &loop->body); - vkd3d_string_buffer_printf(buffer, " %10s }", ""); - } - -@@ -2450,6 +2565,8 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", - [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", - [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", -+ [HLSL_RESOURCE_SAMPLE_INFO] = "sample_info", -+ [HLSL_RESOURCE_RESINFO] = "resinfo", - }; - - assert(load->load_type < ARRAY_SIZE(type_names)); -@@ -2457,8 +2574,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - dump_deref(buffer, &load->resource); - vkd3d_string_buffer_printf(buffer, ", sampler = "); - dump_deref(buffer, &load->sampler); -- vkd3d_string_buffer_printf(buffer, ", coords = "); -- dump_src(buffer, &load->coords); -+ if (load->coords.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", coords = "); -+ dump_src(buffer, &load->coords); -+ } - if (load->sample_index.node) - { - vkd3d_string_buffer_printf(buffer, ", sample index = "); -@@ -2614,7 +2734,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl - vkd3d_string_buffer_printf(&buffer, "\n"); - } - if (func->has_body) -- dump_instr_list(ctx, &buffer, &func->body.instrs); -+ dump_block(ctx, &buffer, &func->body); - - vkd3d_string_buffer_trace(&buffer); - vkd3d_string_buffer_cleanup(&buffer); -@@ -2703,6 +2823,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node) - - static void free_ir_jump(struct hlsl_ir_jump *jump) - { -+ hlsl_src_remove(&jump->condition); - vkd3d_free(jump); - } - -@@ -2822,7 +2943,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) - - for (i = 0; i < attr->args_count; ++i) - hlsl_src_remove(&attr->args[i]); -- hlsl_free_instr_list(&attr->instrs); -+ hlsl_block_cleanup(&attr->instrs); - vkd3d_free((void *)attr->name); - vkd3d_free(attr); - } -@@ -2868,6 +2989,16 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function - struct hlsl_ir_function *func; - struct rb_entry *func_entry; - -+ if (ctx->internal_func_name) -+ { -+ char *internal_name; -+ -+ if (!(internal_name = hlsl_strdup(ctx, ctx->internal_func_name))) -+ return; -+ vkd3d_free(name); -+ name = internal_name; -+ } -+ - func_entry = rb_get(&ctx->functions, name); - if (func_entry) - { -@@ -3127,8 +3258,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) - { -- unsigned int n_variants = 0; - const char *const *variants; -+ unsigned int n_variants; - - switch (bt) - { -@@ -3148,6 +3279,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - break; - - default: -+ n_variants = 0; -+ variants = NULL; - break; - } - -@@ -3199,9 +3332,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - } - } - --static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, -+static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, - const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) - { -+ unsigned int i; -+ - memset(ctx, 0, sizeof(*ctx)); - - ctx->profile = profile; -@@ -3210,7 +3345,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, - - if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) - return false; -- if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : ""))) -+ if (!(ctx->source_files[0] = hlsl_strdup(ctx, compile_info->source_name ? compile_info->source_name : ""))) - { - vkd3d_free(ctx->source_files); - return false; -@@ -3249,6 +3384,19 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, - return false; - ctx->cur_buffer = ctx->globals_buffer; - -+ for (i = 0; i < compile_info->option_count; ++i) -+ { -+ const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; -+ -+ if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) -+ { -+ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; -+ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; -+ } -+ } -+ - return true; - } - -@@ -3260,6 +3408,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - struct hlsl_type *type, *next_type; - unsigned int i; - -+ hlsl_block_cleanup(&ctx->static_initializers); -+ - for (i = 0; i < ctx->source_files_count; ++i) - vkd3d_free((void *)ctx->source_files[i]); - vkd3d_free(ctx->source_files); -@@ -3283,6 +3433,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - vkd3d_free((void *)buffer->name); - vkd3d_free(buffer); - } -+ -+ vkd3d_free(ctx->constant_defs.regs); - } - - int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, -@@ -3324,7 +3476,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - return VKD3D_ERROR_INVALID_ARGUMENT; - } - -- if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) -+ if (!hlsl_ctx_init(&ctx, compile_info, profile, message_context)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) -@@ -3378,3 +3530,44 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - hlsl_ctx_cleanup(&ctx); - return ret; - } -+ -+struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl) -+{ -+ const struct hlsl_ir_function_decl *saved_cur_function = ctx->cur_function; -+ struct vkd3d_shader_code code = {.code = hlsl, .size = strlen(hlsl)}; -+ const char *saved_internal_func_name = ctx->internal_func_name; -+ struct vkd3d_string_buffer *internal_name; -+ struct hlsl_ir_function_decl *func; -+ void *saved_scanner = ctx->scanner; -+ int ret; -+ -+ TRACE("name %s, hlsl %s.\n", debugstr_a(name), debugstr_a(hlsl)); -+ -+ /* The actual name of the function is mangled with a unique prefix, both to -+ * allow defining multiple variants of a function with the same name, and to -+ * avoid polluting the user name space. */ -+ -+ if (!(internal_name = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ vkd3d_string_buffer_printf(internal_name, "<%s-%u>", name, ctx->internal_name_counter++); -+ -+ /* Save and restore everything that matters. -+ * Note that saving the scope stack is hard, and shouldn't be necessary. */ -+ -+ ctx->scanner = NULL; -+ ctx->internal_func_name = internal_name->buffer; -+ ctx->cur_function = NULL; -+ ret = hlsl_lexer_compile(ctx, &code); -+ ctx->scanner = saved_scanner; -+ ctx->internal_func_name = saved_internal_func_name; -+ ctx->cur_function = saved_cur_function; -+ if (ret) -+ { -+ ERR("Failed to compile intrinsic, error %u.\n", ret); -+ hlsl_release_string_buffer(ctx, internal_name); -+ return NULL; -+ } -+ func = hlsl_get_func_decl(ctx, internal_name->buffer); -+ hlsl_release_string_buffer(ctx, internal_name); -+ return func; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index bce48e94b24..2cde5d58eba 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -257,7 +257,7 @@ struct hlsl_reg - /* Number of registers to be allocated. - * Unlike the variable's type's regsize, it is not expressed in register components, but rather - * in whole registers, and may depend on which components are used within the shader. */ -- uint32_t bind_count; -+ uint32_t allocation_size; - /* For numeric registers, a writemask can be provided to indicate the reservation of only some - * of the 4 components. */ - unsigned int writemask; -@@ -337,7 +337,7 @@ struct hlsl_src - struct hlsl_attribute - { - const char *name; -- struct list instrs; -+ struct hlsl_block instrs; - struct vkd3d_shader_location loc; - unsigned int args_count; - struct hlsl_src args[]; -@@ -356,6 +356,7 @@ struct hlsl_attribute - #define HLSL_MODIFIER_COLUMN_MAJOR 0x00000400 - #define HLSL_STORAGE_IN 0x00000800 - #define HLSL_STORAGE_OUT 0x00001000 -+#define HLSL_MODIFIER_INLINE 0x00002000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -417,11 +418,15 @@ struct hlsl_ir_var - enum hlsl_sampler_dim sampler_dim; - struct vkd3d_shader_location first_sampler_dim_loc; - } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; -+ /* Minimum number of binds required to include all object components actually used in the shader. -+ * It may be less than the allocation size, e.g. for texture arrays. */ -+ unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; - - uint32_t is_input_semantic : 1; - uint32_t is_output_semantic : 1; - uint32_t is_uniform : 1; - uint32_t is_param : 1; -+ uint32_t is_separated_resource : 1; - }; - - /* Sized array of variables representing a function's parameters. */ -@@ -502,7 +507,11 @@ enum hlsl_ir_expr_op - HLSL_OP1_COS, - HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ - HLSL_OP1_DSX, -+ HLSL_OP1_DSX_COARSE, -+ HLSL_OP1_DSX_FINE, - HLSL_OP1_DSY, -+ HLSL_OP1_DSY_COARSE, -+ HLSL_OP1_DSY_FINE, - HLSL_OP1_EXP2, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, -@@ -541,8 +550,15 @@ enum hlsl_ir_expr_op - HLSL_OP2_NEQUAL, - HLSL_OP2_RSHIFT, - -+ /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, -+ * then adds c. */ - HLSL_OP3_DP2ADD, -- HLSL_OP3_LERP, -+ /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. -+ * TERNARY(a, b, c) returns c if a == 0 and b otherwise. -+ * They differ for floating point numbers, because -+ * -0.0 == 0.0, but it is not bitwise zero. */ -+ HLSL_OP3_MOVC, -+ HLSL_OP3_TERNARY, - }; - - #define HLSL_MAX_OPERANDS 3 -@@ -558,7 +574,8 @@ enum hlsl_ir_jump_type - { - HLSL_IR_JUMP_BREAK, - HLSL_IR_JUMP_CONTINUE, -- HLSL_IR_JUMP_DISCARD, -+ HLSL_IR_JUMP_DISCARD_NEG, -+ HLSL_IR_JUMP_DISCARD_NZ, - HLSL_IR_JUMP_RETURN, - }; - -@@ -566,6 +583,8 @@ struct hlsl_ir_jump - { - struct hlsl_ir_node node; - enum hlsl_ir_jump_type type; -+ /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ -+ struct hlsl_src condition; - }; - - struct hlsl_ir_swizzle -@@ -600,9 +619,11 @@ struct hlsl_deref - * components, within the pertaining regset), from the start of the variable, of the part - * referenced. - * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- -- * before writing the bytecode. */ -+ * before writing the bytecode. -+ * Since the type information cannot longer be retrieved from the offset alone, the type is -+ * stored in the data_type field. */ - struct hlsl_src offset; -- enum hlsl_regset offset_regset; -+ struct hlsl_type *data_type; - }; - - struct hlsl_ir_load -@@ -624,6 +645,8 @@ enum hlsl_resource_load_type - HLSL_RESOURCE_GATHER_GREEN, - HLSL_RESOURCE_GATHER_BLUE, - HLSL_RESOURCE_GATHER_ALPHA, -+ HLSL_RESOURCE_SAMPLE_INFO, -+ HLSL_RESOURCE_RESINFO, - }; - - struct hlsl_ir_resource_load -@@ -782,6 +805,9 @@ struct hlsl_ctx - /* Pointer to the current function; changes as the parser reads the code. */ - const struct hlsl_ir_function_decl *cur_function; - -+ /* Counter for generating unique internal variable names. */ -+ unsigned int internal_name_counter; -+ - /* Default matrix majority for matrix types. Can be set by a pragma within the HLSL source. */ - unsigned int matrix_majority; - -@@ -803,7 +829,11 @@ struct hlsl_ctx - * Only used for SM1 profiles. */ - struct hlsl_constant_defs - { -- struct hlsl_vec4 *values; -+ struct hlsl_constant_register -+ { -+ uint32_t index; -+ struct hlsl_vec4 value; -+ } *regs; - size_t count, size; - } constant_defs; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register -@@ -814,6 +844,12 @@ struct hlsl_ctx - * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ - uint32_t thread_count[3]; - -+ /* In some cases we generate opcodes by parsing an HLSL function and then -+ * invoking it. If not NULL, this field is the name of the function that we -+ * are currently parsing, "mangled" with an internal prefix to avoid -+ * polluting the user namespace. */ -+ const char *internal_func_name; -+ - /* Whether the parser is inside a state block (effects' metadata) inside a variable declaration. */ - uint32_t in_state_block : 1; - /* Whether the numthreads() attribute has been provided in the entry-point function. */ -@@ -1049,16 +1085,20 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) - } - } - -+char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) VKD3D_PRINTF_FUNC(2, 3); -+ - const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op); - const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type); - const char *debug_hlsl_writemask(unsigned int writemask); - const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count); - - struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); -+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -+ unsigned int index); - struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); - const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); - --struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); - void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); - bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); -@@ -1120,7 +1160,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond - struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, -- enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); -+ enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); - - void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); - -@@ -1132,6 +1172,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); - - struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); - struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, -@@ -1156,6 +1198,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, - struct hlsl_type *type, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -+ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope); - struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, - unsigned int sample_count); - struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); -@@ -1187,6 +1231,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); - unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); - struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, - unsigned int index); -+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ enum hlsl_regset regset, unsigned int index); - bool hlsl_type_is_row_major(const struct hlsl_type *type); - unsigned int hlsl_type_minor_size(const struct hlsl_type *type); - unsigned int hlsl_type_major_size(const struct hlsl_type *type); -@@ -1227,9 +1273,11 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); - bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); -+ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); - int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); - -+struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); -+ - int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); - - #endif -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 0e07fe578e1..fb6d485ea69 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -53,7 +53,7 @@ struct parse_initializer - { - struct hlsl_ir_node **args; - unsigned int args_count; -- struct list *instrs; -+ struct hlsl_block *instrs; - bool braces; - }; - -@@ -73,6 +73,10 @@ struct parse_variable_def - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; - struct parse_initializer initializer; -+ -+ struct hlsl_type *basic_type; -+ unsigned int modifiers; -+ struct vkd3d_shader_location modifiers_loc; - }; - - struct parse_function -@@ -85,8 +89,8 @@ struct parse_function - - struct parse_if_body - { -- struct list *then_block; -- struct list *else_block; -+ struct hlsl_block *then_block; -+ struct hlsl_block *else_block; - }; - - enum parse_assign_op -@@ -129,9 +133,18 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); - } - --static struct hlsl_ir_node *node_from_list(struct list *list) -+static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) - { -- return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); -+ return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); -+} -+ -+static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) -+{ -+ struct hlsl_block *block; -+ -+ if ((block = hlsl_alloc(ctx, sizeof(*block)))) -+ hlsl_block_init(block); -+ return block; - } - - static struct list *make_empty_list(struct hlsl_ctx *ctx) -@@ -143,10 +156,10 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) - return list; - } - --static void destroy_instr_list(struct list *list) -+static void destroy_block(struct hlsl_block *block) - { -- hlsl_free_instr_list(list); -- vkd3d_free(list); -+ hlsl_block_cleanup(block); -+ vkd3d_free(block); - } - - static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, -@@ -273,10 +286,7 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - return hlsl_types_are_componentwise_equal(ctx, src, dst); - } - --static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- unsigned int comp, const struct vkd3d_shader_location *loc); -- --static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *src_type = node->data_type; -@@ -313,7 +323,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - { - struct hlsl_ir_node *component_load; - struct hlsl_type *dst_comp_type; -- struct hlsl_block block; -+ struct hlsl_block store_block; - unsigned int src_idx; - - if (broadcast) -@@ -333,21 +343,21 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - - dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - -- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) -+ if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) - return NULL; - - if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) - return NULL; -- list_add_tail(instrs, &cast->entry); -+ hlsl_block_add_instr(block, cast); - -- if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &store_block); - } - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - - return &load->node; - } -@@ -355,12 +365,12 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - { - if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) - return NULL; -- list_add_tail(instrs, &cast->entry); -+ hlsl_block_add_instr(block, cast); - return cast; - } - } - --static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *src_type = node->data_type; -@@ -386,7 +396,7 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); - -- return add_cast(ctx, instrs, node, dst_type, loc); -+ return add_cast(ctx, block, node, dst_type, loc); - } - - static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, -@@ -405,29 +415,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, - return modifiers | mod; - } - --static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) -+static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) - { - struct hlsl_ir_node *condition, *not, *iff, *jump; - struct hlsl_block then_block; - - /* E.g. "for (i = 0; ; ++i)". */ -- if (list_empty(cond_list)) -+ if (list_empty(&cond_block->instrs)) - return true; - -- condition = node_from_list(cond_list); -+ condition = node_from_block(cond_block); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) - return false; -- list_add_tail(cond_list, ¬->entry); -+ hlsl_block_add_instr(cond_block, not); - - hlsl_block_init(&then_block); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) - return false; - hlsl_block_add_instr(&then_block, jump); - - if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) - return false; -- list_add_tail(cond_list, &iff->entry); -+ hlsl_block_add_instr(cond_block, iff); - return true; - } - -@@ -454,10 +464,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att - return false; - } - --static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, -- struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) -+static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, -+ const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, -+ struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) - { -- struct hlsl_block body_block; - struct hlsl_ir_node *loop; - unsigned int i; - -@@ -476,53 +486,49 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const - } - else - { -- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); -+ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); - } - } - else if (!strcmp(attr->name, "loop") - || !strcmp(attr->name, "fastopt") - || !strcmp(attr->name, "allow_uav_condition")) - { -- hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); -+ hlsl_fixme(ctx, loc, "Unhandled attribute '%s'.", attr->name); - } - else - { -- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); - } - } - -- if (!init && !(init = make_empty_list(ctx))) -+ if (!init && !(init = make_empty_block(ctx))) - goto oom; - - if (!append_conditional_break(ctx, cond)) - goto oom; - -- hlsl_block_init(&body_block); -- -- if (type != LOOP_DO_WHILE) -- list_move_tail(&body_block.instrs, cond); -- -- list_move_tail(&body_block.instrs, body); -- - if (iter) -- list_move_tail(&body_block.instrs, iter); -+ hlsl_block_add_block(body, iter); - - if (type == LOOP_DO_WHILE) -- list_move_tail(&body_block.instrs, cond); -+ list_move_tail(&body->instrs, &cond->instrs); -+ else -+ list_move_head(&body->instrs, &cond->instrs); - -- if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) -+ if (!(loop = hlsl_new_loop(ctx, body, loc))) - goto oom; -- list_add_tail(init, &loop->entry); -+ hlsl_block_add_instr(init, loop); - -- vkd3d_free(cond); -- vkd3d_free(body); -+ destroy_block(cond); -+ destroy_block(body); -+ destroy_block(iter); - return init; - - oom: -- destroy_instr_list(init); -- destroy_instr_list(cond); -- destroy_instr_list(iter); -- destroy_instr_list(body); -+ destroy_block(init); -+ destroy_block(cond); -+ destroy_block(iter); -+ destroy_block(body); - return NULL; - } - -@@ -539,7 +545,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer - - static void free_parse_initializer(struct parse_initializer *initializer) - { -- destroy_instr_list(initializer->instrs); -+ destroy_block(initializer->instrs); - vkd3d_free(initializer->args); - } - -@@ -625,7 +631,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod - return NULL; - } - --static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, -+static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *return_type = ctx->cur_function->return_type; -@@ -637,7 +643,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, - { - struct hlsl_ir_node *store; - -- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) -+ if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) - return false; - - if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) -@@ -656,18 +662,18 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); - } - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) - return false; -- list_add_tail(instrs, &jump->entry); -+ hlsl_block_add_instr(block, jump); - - return true; - } - --static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- unsigned int comp, const struct vkd3d_shader_location *loc) -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *load, *store; -- struct hlsl_block block; -+ struct hlsl_block load_block; - struct hlsl_ir_var *var; - struct hlsl_deref src; - -@@ -676,17 +682,17 @@ static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - - hlsl_init_simple_deref_from_var(&src, var); -- if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) -+ if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &load_block); - - return load; - } - --static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, -+static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *record, - unsigned int idx, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *index, *c; -@@ -695,20 +701,20 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct - - if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) - return false; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(index = hlsl_new_index(ctx, record, c, loc))) - return false; -- list_add_tail(instrs, &index->entry); -+ hlsl_block_add_instr(block, index); - - return true; - } - --static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc); - --static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, -+static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *array, - struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; -@@ -731,13 +737,13 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h - return false; - } - -- if (!(index = add_implicit_conversion(ctx, instrs, index, -+ if (!(index = add_implicit_conversion(ctx, block, index, - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) - return false; - - if (!(return_index = hlsl_new_index(ctx, array, index, loc))) - return false; -- list_add_tail(instrs, &return_index->entry); -+ hlsl_block_add_instr(block, return_index); - - return true; - } -@@ -750,7 +756,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h - - if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) - return false; -- list_add_tail(instrs, &cast->entry); -+ hlsl_block_add_instr(block, cast); - index = cast; - - if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) -@@ -764,7 +770,7 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct h - - if (!(return_index = hlsl_new_index(ctx, array, index, loc))) - return false; -- list_add_tail(instrs, &return_index->entry); -+ hlsl_block_add_instr(block, return_index); - - return true; - } -@@ -830,6 +836,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) - return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; - } - -+static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -+} -+ -+static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return !shader_profile_version_ge(ctx, major, minor); -+} -+ - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - struct hlsl_type *type, unsigned int modifiers, struct list *defs) - { -@@ -1020,7 +1036,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const - struct hlsl_reg_reservation reservation = {0}; - char *endptr; - -- if (ctx->profile->major_version < 4) -+ if (shader_profile_version_lt(ctx, 4, 0)) - return reservation; - - reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); -@@ -1079,17 +1095,17 @@ static struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, - return NULL; - } - --static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) -+static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) - { -- struct list *list; -+ struct hlsl_block *block; - -- if (!(list = make_empty_list(ctx))) -+ if (!(block = make_empty_block(ctx))) - { -- hlsl_free_instr(node); -+ hlsl_free_instr(instr); - return NULL; - } -- list_add_tail(list, &node->entry); -- return list; -+ hlsl_block_add_instr(block, instr); -+ return block; - } - - static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1097,20 +1113,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - { - struct hlsl_ir_constant *constant; - struct hlsl_ir_node *node; -+ struct hlsl_block expr; - unsigned int ret = 0; - bool progress; - -- if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (node->type) -+ { -+ case HLSL_IR_CONSTANT: -+ case HLSL_IR_EXPR: -+ case HLSL_IR_SWIZZLE: -+ case HLSL_IR_LOAD: -+ case HLSL_IR_INDEX: -+ continue; -+ case HLSL_IR_CALL: -+ case HLSL_IR_IF: -+ case HLSL_IR_LOOP: -+ case HLSL_IR_JUMP: -+ case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_RESOURCE_STORE: -+ case HLSL_IR_STORE: -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Expected literal expression."); -+ } -+ } -+ -+ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -+ return 0; -+ hlsl_block_add_block(&expr, block); -+ -+ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), - hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) -+ { -+ hlsl_block_cleanup(&expr); - return 0; -+ } - - do - { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, block); -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, &expr); - } while (progress); - -- node = node_from_list(&block->instrs); -+ node = node_from_block(&expr); - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); -@@ -1119,9 +1165,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - else - { - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Failed to evaluate constant expression %d.", node->type); -+ "Failed to evaluate constant expression."); - } - -+ hlsl_block_cleanup(&expr); -+ - return ret; - } - -@@ -1253,7 +1301,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct - return true; - } - --static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], - struct hlsl_type *type, const struct vkd3d_shader_location *loc) - { -@@ -1277,38 +1325,38 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, - for (i = 0; i < type->dimy * type->dimx; ++i) - { - struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; -- struct hlsl_block block; -+ struct hlsl_block store_block; - unsigned int j; - - for (j = 0; j < HLSL_MAX_OPERANDS; j++) - { - if (operands[j]) - { -- if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) - return NULL; - - cell_operands[j] = load; - } - } - -- if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) -+ if (!(value = add_expr(ctx, block, op, cell_operands, scalar_type, loc))) - return NULL; - -- if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, value)) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &store_block); - } - - if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(instrs, &var_load->node.entry); -+ hlsl_block_add_instr(block, &var_load->node); - - return &var_load->node; - } - - if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) - return NULL; -- list_add_tail(instrs, &expr->entry); -+ hlsl_block_add_instr(block, expr); - - return expr; - } -@@ -1334,23 +1382,23 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * - } - } - --static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; - -- return add_expr(ctx, instrs, op, args, arg->data_type, loc); -+ return add_expr(ctx, block, op, args, arg->data_type, loc); - } - --static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - check_integer_type(ctx, arg); - -- return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); -+ return add_unary_arithmetic_expr(ctx, block, op, arg, loc); - } - --static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; -@@ -1359,10 +1407,10 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->dimx, arg->data_type->dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, bool_type, loc); -+ return add_expr(ctx, block, op, args, bool_type, loc); - } - - static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, -@@ -1378,7 +1426,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str - return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - } - --static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1387,49 +1435,26 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str - - common_type = get_common_numeric_type(ctx, arg1, arg2, loc); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, common_type, loc); --} -- --static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); -- return list1; -+ return add_expr(ctx, block, op, args, common_type, loc); - } - --static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { - check_integer_type(ctx, arg1); - check_integer_type(ctx, arg2); - -- return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); -+ return add_binary_arithmetic_expr(ctx, block, op, arg1, arg2, loc); - } - --static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; --} -- --static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1445,27 +1470,16 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, return_type, loc); -+ return add_expr(ctx, block, op, args, return_type, loc); - } - --static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); -- return list1; --} -- --static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1479,28 +1493,16 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct - - common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, common_type, loc); -+ return add_expr(ctx, block, op, args, common_type, loc); - } - --static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; --} -- --static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1522,28 +1524,16 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l - return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, return_type, loc); -+ return add_expr(ctx, block, op, args, return_type, loc); - } - --static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; --} -- --static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -@@ -1557,8 +1547,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg1->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } -@@ -1568,8 +1557,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg2->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } -@@ -1598,6 +1586,53 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - return add_expr(ctx, instrs, op, args, ret_type, loc); - } - -+static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, -+ struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); -+ -+ hlsl_block_add_block(block1, block2); -+ destroy_block(block2); -+ -+ switch (op) -+ { -+ case HLSL_OP2_ADD: -+ case HLSL_OP2_DIV: -+ case HLSL_OP2_MOD: -+ case HLSL_OP2_MUL: -+ add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_BIT_XOR: -+ add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LESS: -+ case HLSL_OP2_GEQUAL: -+ case HLSL_OP2_EQUAL: -+ case HLSL_OP2_NEQUAL: -+ add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LOGIC_AND: -+ case HLSL_OP2_LOGIC_OR: -+ add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LSHIFT: -+ case HLSL_OP2_RSHIFT: -+ add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ return block1; -+} -+ - static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) - { - static const enum hlsl_ir_expr_op ops[] = -@@ -1654,7 +1689,7 @@ static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsig - return true; - } - --static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, -+static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) - { - struct hlsl_type *lhs_type = lhs->data_type; -@@ -1663,7 +1698,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - - if (assign_op == ASSIGN_OP_SUB) - { -- if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) -+ if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) - return NULL; - assign_op = ASSIGN_OP_ADD; - } -@@ -1672,14 +1707,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - enum hlsl_ir_expr_op op = op_from_assignment(assign_op); - - assert(op); -- if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) -+ if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) - return NULL; - } - - if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) - writemask = (1 << lhs_type->dimx) - 1; - -- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) -+ if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) - return NULL; - - while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) -@@ -1708,7 +1743,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - { - return NULL; - } -- list_add_tail(instrs, &new_swizzle->entry); -+ hlsl_block_add_instr(block, new_swizzle); - - lhs = swizzle->val.node; - rhs = new_swizzle; -@@ -1754,7 +1789,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - hlsl_cleanup_deref(&resource_deref); - return NULL; - } -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&resource_deref); - } - else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) -@@ -1773,13 +1808,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - - if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) - return NULL; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) - return NULL; -- list_add_tail(instrs, &cell->entry); -+ hlsl_block_add_instr(block, cell); - -- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) -+ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) - return NULL; - - if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) -@@ -1790,7 +1825,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - hlsl_cleanup_deref(&deref); - return NULL; - } -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&deref); - } - } -@@ -1807,7 +1842,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - hlsl_cleanup_deref(&deref); - return NULL; - } -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&deref); - } - -@@ -1816,14 +1851,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - * the last instruction in the list, we do need to copy. */ - if (!(copy = hlsl_new_copy(ctx, rhs))) - return NULL; -- list_add_tail(instrs, ©->entry); -+ hlsl_block_add_instr(block, copy); - return copy; - } - --static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, -+static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, - const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *lhs = node_from_list(instrs); -+ struct hlsl_ir_node *lhs = node_from_block(block); - struct hlsl_ir_node *one; - - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) -@@ -1832,9 +1867,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem - - if (!(one = hlsl_new_int_constant(ctx, 1, loc))) - return false; -- list_add_tail(instrs, &one->entry); -+ hlsl_block_add_instr(block, one); - -- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) -+ if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) - return false; - - if (post) -@@ -1843,7 +1878,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem - - if (!(copy = hlsl_new_copy(ctx, lhs))) - return false; -- list_add_tail(instrs, ©->entry); -+ hlsl_block_add_instr(block, copy); - - /* Post increment/decrement expressions are considered const. */ - if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) -@@ -1853,7 +1888,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem - return true; - } - --static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, -+static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) - { - unsigned int src_comp_count = hlsl_type_component_count(src->data_type); -@@ -1868,7 +1903,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_type *dst_comp_type; - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) - return; - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); -@@ -1878,7 +1913,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, - - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(instrs, &block); - - ++*store_index; - } -@@ -1924,211 +1959,231 @@ static bool type_has_numeric_components(struct hlsl_type *type) - return false; - } - --static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, -- unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) -+static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, -+ const struct vkd3d_shader_location *loc) - { -- struct parse_variable_def *v, *v_next; -+ modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); -+ if (modifiers) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_modifiers_to_string(ctx, modifiers))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+} -+ -+static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) -+{ -+ struct hlsl_type *basic_type = v->basic_type; - struct hlsl_ir_function_decl *func; -- unsigned int invalid_modifiers; -- struct list *statements_list; -+ struct hlsl_semantic new_semantic; -+ uint32_t modifiers = v->modifiers; -+ bool unbounded_res_array = false; - struct hlsl_ir_var *var; - struct hlsl_type *type; - bool local = true; -+ char *var_name; -+ unsigned int i; -+ -+ assert(basic_type); - - if (basic_type->class == HLSL_CLASS_MATRIX) - assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - -- if (!(statements_list = make_empty_list(ctx))) -- { -- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -- free_parse_variable_def(v); -- vkd3d_free(var_list); -- return NULL; -- } -+ type = basic_type; - -- if (!var_list) -- return statements_list; -- -- invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); -- if (invalid_modifiers) -+ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) - { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) -- hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -+ for (i = 0; i < v->arrays.count; ++i) -+ unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); - } - -- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -+ if (unbounded_res_array) - { -- bool unbounded_res_array = false; -- unsigned int i; -- -- type = basic_type; -- -- if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) -+ if (v->arrays.count == 1) - { -- for (i = 0; i < v->arrays.count; ++i) -- unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -+ hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); -+ return; - } -- -- if (unbounded_res_array) -+ else - { -- if (v->arrays.count == 1) -- { -- hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); -- free_parse_variable_def(v); -- continue; -- } -- else -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Unbounded resource arrays cannot be multi-dimensional."); -- } -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Unbounded resource arrays cannot be multi-dimensional."); - } -- else -+ } -+ else -+ { -+ for (i = 0; i < v->arrays.count; ++i) - { -- for (i = 0; i < v->arrays.count; ++i) -+ if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - { -- if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -- { -- unsigned int size = initializer_size(&v->initializer); -- unsigned int elem_components = hlsl_type_component_count(type); -- -- if (i < v->arrays.count - 1) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Only innermost array size can be implicit."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else if (elem_components == 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Cannot declare an implicit size array of a size 0 type."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else if (size == 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Implicit size arrays need to be initialized."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -+ unsigned int size = initializer_size(&v->initializer); -+ unsigned int elem_components = hlsl_type_component_count(type); - -- } -- else if (size % elem_components != 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Cannot initialize implicit size array with %u components, expected a multiple of %u.", -- size, elem_components); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else -- { -- v->arrays.sizes[i] = size / elem_components; -- } -+ if (i < v->arrays.count - 1) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Only innermost array size can be implicit."); -+ v->initializer.args_count = 0; -+ } -+ else if (elem_components == 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Cannot declare an implicit size array of a size 0 type."); -+ v->initializer.args_count = 0; -+ } -+ else if (size == 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Implicit size arrays need to be initialized."); -+ v->initializer.args_count = 0; -+ } -+ else if (size % elem_components != 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Cannot initialize implicit size array with %u components, expected a multiple of %u.", -+ size, elem_components); -+ v->initializer.args_count = 0; -+ } -+ else -+ { -+ v->arrays.sizes[i] = size / elem_components; - } -- type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); - } -+ type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); - } -- vkd3d_free(v->arrays.sizes); -+ } -+ -+ if (!(var_name = vkd3d_strdup(v->name))) -+ return; - -- if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) -+ new_semantic = v->semantic; -+ if (v->semantic.name) -+ { -+ if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) - { -- free_parse_variable_def(v); -- continue; -+ vkd3d_free(var_name); -+ return; - } -+ } - -- var->buffer = ctx->cur_buffer; -+ if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) -+ { -+ hlsl_cleanup_semantic(&new_semantic); -+ vkd3d_free(var_name); -+ return; -+ } - -- if (var->buffer == ctx->globals_buffer) -- { -- if (var->reg_reservation.offset_type) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -- "packoffset() is only allowed inside constant buffer declarations."); -- } -+ var->buffer = ctx->cur_buffer; - -- if (ctx->cur_scope == ctx->globals) -- { -- local = false; -+ if (var->buffer == ctx->globals_buffer) -+ { -+ if (var->reg_reservation.offset_type) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() is only allowed inside constant buffer declarations."); -+ } - -- if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); -+ if (ctx->cur_scope == ctx->globals) -+ { -+ local = false; - -- /* Mark it as uniform. We need to do this here since synthetic -- * variables also get put in the global scope, but shouldn't be -- * considered uniforms, and we have no way of telling otherwise. */ -- if (!(modifiers & HLSL_STORAGE_STATIC)) -- var->storage_modifiers |= HLSL_STORAGE_UNIFORM; -+ if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -- type_has_object_components(var->data_type, true)) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Target profile doesn't support objects as struct members in uniform variables.\n"); -- } -+ /* Mark it as uniform. We need to do this here since synthetic -+ * variables also get put in the global scope, but shouldn't be -+ * considered uniforms, and we have no way of telling otherwise. */ -+ if (!(modifiers & HLSL_STORAGE_STATIC)) -+ var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if ((func = hlsl_get_func_decl(ctx, var->name))) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -- "'%s' is already defined as a function.", var->name); -- hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, -- "'%s' was previously defined here.", var->name); -- } -- } -- else -+ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -+ type_has_object_components(var->data_type, true)) - { -- static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED -- | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; -- -- if (modifiers & invalid) -- { -- struct vkd3d_string_buffer *string; -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Target profile doesn't support objects as struct members in uniform variables."); -+ } - -- if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers '%s' are not allowed on local variables.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- } -- if (var->semantic.name) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -- "Semantics are not allowed on local variables."); -+ if ((func = hlsl_get_func_decl(ctx, var->name))) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -+ "'%s' is already defined as a function.", var->name); -+ hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, -+ "'%s' was previously defined here.", var->name); - } -+ } -+ else -+ { -+ static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED -+ | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; - -- if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -- && type_has_object_components(var->data_type, false)) -+ if (modifiers & invalid) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Static variables cannot have both numeric and resource components."); -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers '%s' are not allowed on local variables.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); - } -+ if (var->semantic.name) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "Semantics are not allowed on local variables."); - -- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count -- && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) -+ if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -- "Const variable \"%s\" is missing an initializer.", var->name); -- hlsl_free_var(var); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -- continue; -+ "Const variable \"%s\" is missing an initializer.", var->name); - } -+ } - -- if (!hlsl_add_var(ctx, var, local)) -+ if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -+ && type_has_object_components(var->data_type, false)) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Static variables cannot have both numeric and resource components."); -+ } -+ -+ if (!hlsl_add_var(ctx, var, local)) -+ { -+ struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -+ -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -+ "Variable \"%s\" was already declared in this scope.", var->name); -+ hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); -+ hlsl_free_var(var); -+ return; -+ } -+} -+ -+static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) -+{ -+ struct parse_variable_def *v, *v_next; -+ struct hlsl_block *initializers; -+ struct hlsl_ir_var *var; -+ struct hlsl_type *type; -+ -+ if (!(initializers = make_empty_block(ctx))) -+ { -+ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) - { -- struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -+ free_parse_variable_def(v); -+ } -+ vkd3d_free(var_list); -+ return NULL; -+ } - -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -- "Variable \"%s\" was already declared in this scope.", var->name); -- hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); -- hlsl_free_var(var); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -+ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -+ { -+ /* If this fails, the variable failed to be declared. */ -+ if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) -+ { -+ free_parse_variable_def(v); - continue; - } -+ type = var->data_type; - - if (v->initializer.args_count) - { -@@ -2143,8 +2198,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u components in initializer, but got %u.", - hlsl_type_component_count(type), size); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - -@@ -2159,16 +2213,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); - - assert(v->initializer.args_count == 1); -- list_add_tail(v->initializer.instrs, &load->node.entry); -+ hlsl_block_add_instr(v->initializer.instrs, &load->node); - add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); - } - -- if (modifiers & HLSL_STORAGE_STATIC) -- list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); -+ if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); - else -- list_move_tail(statements_list, v->initializer.instrs); -- vkd3d_free(v->initializer.args); -- vkd3d_free(v->initializer.instrs); -+ hlsl_block_add_block(initializers, v->initializer.instrs); - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -@@ -2178,34 +2230,35 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - - if (type_has_object_components(var->data_type, false)) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - - if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - hlsl_block_add_instr(&ctx->static_initializers, zero); - -- if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) -+ if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - - if (!(store = hlsl_new_simple_store(ctx, var, cast))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - hlsl_block_add_instr(&ctx->static_initializers, store); - } -- vkd3d_free(v); -+ free_parse_variable_def(v); - } -+ - vkd3d_free(var_list); -- return statements_list; -+ return initializers; - } - - struct find_function_call_args -@@ -2277,6 +2330,92 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, - return args.decl; - } - -+static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ -+ return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); -+} -+ -+static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ const struct parse_initializer *args, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *call; -+ unsigned int i; -+ -+ assert(args->args_count == func->parameters.count); -+ -+ for (i = 0; i < func->parameters.count; ++i) -+ { -+ struct hlsl_ir_var *param = func->parameters.vars[i]; -+ struct hlsl_ir_node *arg = args->args[i]; -+ -+ if (!hlsl_types_are_equal(arg->data_type, param->data_type)) -+ { -+ struct hlsl_ir_node *cast; -+ -+ if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) -+ return false; -+ args->args[i] = cast; -+ arg = cast; -+ } -+ -+ if (param->storage_modifiers & HLSL_STORAGE_IN) -+ { -+ struct hlsl_ir_node *store; -+ -+ if (!(store = hlsl_new_simple_store(ctx, param, arg))) -+ return false; -+ hlsl_block_add_instr(args->instrs, store); -+ } -+ } -+ -+ if (!(call = hlsl_new_call(ctx, func, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, call); -+ -+ for (i = 0; i < func->parameters.count; ++i) -+ { -+ struct hlsl_ir_var *param = func->parameters.vars[i]; -+ struct hlsl_ir_node *arg = args->args[i]; -+ -+ if (param->storage_modifiers & HLSL_STORAGE_OUT) -+ { -+ struct hlsl_ir_load *load; -+ -+ if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) -+ hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, -+ "Output argument to \"%s\" is const.", func->func->name); -+ -+ if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, &load->node); -+ -+ if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) -+ return false; -+ } -+ } -+ -+ if (func->return_var) -+ { -+ struct hlsl_ir_load *load; -+ -+ if (!(load = hlsl_new_var_load(ctx, func->return_var, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, &load->node); -+ } -+ else -+ { -+ struct hlsl_ir_node *expr; -+ -+ if (!(expr = hlsl_new_void_expr(ctx, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, expr); -+ } -+ -+ return true; -+} -+ - static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, - const struct parse_initializer *params, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { -@@ -2394,18 +2533,18 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - - if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) - return false; -- list_add_tail(params->instrs, &one->entry); -+ hlsl_block_add_instr(params->instrs, one); - - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - mul = one; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) -@@ -2431,7 +2570,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, - { - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; -@@ -2442,14 +2581,14 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, - { - if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) - return false; -- list_add_tail(params->instrs, &bfalse->entry); -+ hlsl_block_add_instr(params->instrs, bfalse); - - or = bfalse; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) - return false; - - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) -@@ -2544,6 +2683,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); - } - -+static bool intrinsic_clip(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *condition, *jump; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ -+ condition = params->args[0]; -+ -+ if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, condition->data_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, jump); -+ -+ return true; -+} -+ - static bool intrinsic_cos(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2579,26 +2746,26 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - - if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg1_swzl1->entry); -+ hlsl_block_add_instr(params->instrs, arg1_swzl1); - - if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg2_swzl1->entry); -+ hlsl_block_add_instr(params->instrs, arg2_swzl1); - - if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) - return false; - - if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) - return false; -- list_add_tail(params->instrs, &mul1_neg->entry); -+ hlsl_block_add_instr(params->instrs, mul1_neg); - - if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg1_swzl2->entry); -+ hlsl_block_add_instr(params->instrs, arg1_swzl2); - - if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg2_swzl2->entry); -+ hlsl_block_add_instr(params->instrs, arg2_swzl2); - - if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) - return false; -@@ -2617,6 +2784,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); - } - -+static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); -+} -+ -+static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); -+} -+ - static bool intrinsic_ddy(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2628,6 +2817,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); - } - -+static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); -+} -+ -+static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); -+} -+ - static bool intrinsic_distance(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2668,7 +2879,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, - /* 1/ln(2) */ - if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) - return false; -- list_add_tail(params->instrs, &coeff->entry); -+ hlsl_block_add_instr(params->instrs, coeff); - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) - return false; -@@ -2702,6 +2913,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer - const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select, *zero; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; - static const struct hlsl_constant_value zero_value; - - if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -@@ -2715,7 +2927,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer - - if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) - return false; -@@ -2729,7 +2941,10 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer - if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) - return false; - -- if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) -+ operands[0] = ge; -+ operands[1] = frac; -+ operands[2] = neg_frac; -+ if (!(select = add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, x->data_type, loc))) - return false; - - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); -@@ -2806,7 +3021,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, - } - - static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, -- struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, -+ struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *log, *mul; -@@ -2823,14 +3038,17 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, - static bool intrinsic_lit(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow, *load; -- struct hlsl_ir_node *n_l, *n_h, *m, *diffuse, *zero, *store, *init; -- struct hlsl_constant_value init_value; -- struct hlsl_ir_load *var_load; -- struct hlsl_deref var_deref; -- struct hlsl_type *ret_type; -- struct hlsl_ir_var *var; -- struct hlsl_block block; -+ struct hlsl_ir_function_decl *func; -+ -+ static const char body[] = -+ "float4 lit(float n_l, float n_h, float m)\n" -+ "{\n" -+ " float4 ret;\n" -+ " ret.xw = 1.0;\n" -+ " ret.y = max(n_l, 0);\n" -+ " ret.z = (n_l < 0 || n_h < 0) ? 0 : pow(n_h, m);\n" -+ " return ret;\n" -+ "}"; - - if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR - || params->args[1]->data_type->class != HLSL_CLASS_SCALAR -@@ -2840,70 +3058,10 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, - return false; - } - -- if (!(n_l = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -- -- if (!(n_h = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) -- return false; -- -- if (!(m = intrinsic_float_convert_arg(ctx, params, params->args[2], loc))) -- return false; -- -- ret_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); -- -- if (!(var = hlsl_new_synthetic_var(ctx, "lit", ret_type, loc))) -- return false; -- hlsl_init_simple_deref_from_var(&var_deref, var); -- -- init_value.u[0].f = 1.0f; -- init_value.u[1].f = 0.0f; -- init_value.u[2].f = 0.0f; -- init_value.u[3].f = 1.0f; -- if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) -- return false; -- list_add_tail(params->instrs, &init->entry); -- -- if (!(store = hlsl_new_simple_store(ctx, var, init))) -- return false; -- list_add_tail(params->instrs, &store->entry); -- -- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -- return false; -- list_add_tail(params->instrs, &zero->entry); -- -- /* Diffuse component. */ -- if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) -- return false; -- -- if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) -- return false; -- list_move_tail(params->instrs, &block.instrs); -- -- /* Specular component. */ -- if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) -- return false; -- -- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) -- return false; -- -- if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) -- return false; -- -- if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) -- return false; -- -- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) -- return false; -- -- if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) -- return false; -- list_move_tail(params->instrs, &block.instrs); -- -- if (!(var_load = hlsl_new_var_load(ctx, var, loc))) -+ if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) - return false; -- list_add_tail(params->instrs, &var_load->node.entry); - -- return true; -+ return add_user_call(ctx, func, params, loc); - } - - static bool intrinsic_log(struct hlsl_ctx *ctx, -@@ -3034,10 +3192,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - { - struct hlsl_ir_node *value1, *value2, *mul; - -- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) -+ if (!(value1 = hlsl_add_load_component(ctx, params->instrs, -+ cast1, j * cast1->data_type->dimx + k, loc))) - return false; - -- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) -+ if (!(value2 = hlsl_add_load_component(ctx, params->instrs, -+ cast2, k * cast2->data_type->dimx + i, loc))) - return false; - - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) -@@ -3056,13 +3216,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - - if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); - } - } - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); -+ hlsl_block_add_instr(params->instrs, &load->node); - - return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); - } -@@ -3169,7 +3329,7 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, - - if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) - return false; -- list_add_tail(params->instrs, &zero->entry); -+ hlsl_block_add_instr(params->instrs, zero); - - /* Check if 0 < arg, cast bool to int */ - -@@ -3205,62 +3365,33 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); - } - --/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ --static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, -- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res, *one, *minus_two, *three; -- -- if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -- return false; -- -- min_arg = params->args[0]; -- max_arg = params->args[1]; -- x_arg = params->args[2]; -- -- if (!(min_arg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, min_arg, loc))) -- return false; -- -- if (!(p_num = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, x_arg, min_arg, loc))) -- return false; -- -- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, max_arg, min_arg, loc))) -- return false; -- -- if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) -- return false; -- list_add_tail(params->instrs, &one->entry); -- -- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) -- return false; -- -- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) -- return false; -- -- if (!(p = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, p, loc))) -- return false; -- -- if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) -- return false; -- list_add_tail(params->instrs, &minus_two->entry); -- -- if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) -- return false; -- list_add_tail(params->instrs, &three->entry); -+/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ -+static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; - -- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) -- return false; -+ static const char template[] = -+ "%s smoothstep(%s low, %s high, %s x)\n" -+ "{\n" -+ " %s p = saturate((x - low) / (high - low));\n" -+ " return (p * p) * (3 - 2 * p);\n" -+ "}"; - -- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, three, res, loc))) -+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - -- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) -+ if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) - return false; -- -- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, res, loc))) -+ func = hlsl_compile_internal_function(ctx, "smoothstep", body); -+ vkd3d_free(body); -+ if (!func) - return false; - -- return true; -+ return add_user_call(ctx, func, params, loc); - } - - static bool intrinsic_sqrt(struct hlsl_ctx *ctx, -@@ -3308,7 +3439,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (params->args_count == 4) - { -- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); -+ hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); - } - - sampler_type = params->args[0]->data_type; -@@ -3326,7 +3457,42 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) -- coords = params->args[1]; -+ { -+ return false; -+ } -+ -+ /* tex1D() functions never produce 1D resource declarations. For newer profiles half offset -+ is used for the second coordinate, while older ones appear to replicate first coordinate.*/ -+ if (dim == HLSL_SAMPLER_DIM_1D) -+ { -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_node *half; -+ struct hlsl_ir_var *var; -+ unsigned int idx = 0; -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) -+ return false; -+ -+ initialize_var_components(ctx, params->instrs, var, &idx, coords); -+ if (shader_profile_version_ge(ctx, 4, 0)) -+ { -+ if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, half); -+ -+ initialize_var_components(ctx, params->instrs, var, &idx, half); -+ } -+ else -+ initialize_var_components(ctx, params->instrs, var, &idx, coords); -+ -+ if (!(load = hlsl_new_var_load(ctx, var, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, &load->node); -+ -+ coords = &load->node; -+ -+ dim = HLSL_SAMPLER_DIM_2D; -+ } - - load_params.coords = coords; - load_params.resource = params->args[0]; -@@ -3335,10 +3501,16 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(params->instrs, &load->entry); -+ hlsl_block_add_instr(params->instrs, load); - return true; - } - -+static bool intrinsic_tex1D(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); -+} -+ - static bool intrinsic_tex2D(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3351,6 +3523,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); - } - -+static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); -+} -+ - static bool intrinsic_transpose(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3369,7 +3547,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - - if ((string = hlsl_type_to_string(ctx, arg_type))) - hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", -+ "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", - string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; -@@ -3377,7 +3555,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - - if (arg_type->class == HLSL_CLASS_SCALAR) - { -- list_add_tail(params->instrs, &arg->entry); -+ hlsl_block_add_instr(params->instrs, arg); - return true; - } - -@@ -3393,18 +3571,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - { - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) - return false; - - if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); - } - } - - if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &var_load->node.entry); -+ hlsl_block_add_instr(params->instrs, &var_load->node); - - return true; - } -@@ -3444,13 +3622,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - - if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) - return false; -- list_add_tail(params->instrs, &c->entry); -+ hlsl_block_add_instr(params->instrs, c); - - if (arg_type->class == HLSL_CLASS_VECTOR) - { - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) - return false; -- list_add_tail(params->instrs, &swizzle->entry); -+ hlsl_block_add_instr(params->instrs, swizzle); - - arg = swizzle; - } -@@ -3458,7 +3636,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) - return false; - -- if (ctx->profile->major_version >= 4) -+ if (shader_profile_version_ge(ctx, 4, 0)) - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); - - return true; -@@ -3482,10 +3660,15 @@ intrinsic_functions[] = - {"asfloat", 1, true, intrinsic_asfloat}, - {"asuint", -1, true, intrinsic_asuint}, - {"clamp", 3, true, intrinsic_clamp}, -+ {"clip", 1, true, intrinsic_clip}, - {"cos", 1, true, intrinsic_cos}, - {"cross", 2, true, intrinsic_cross}, - {"ddx", 1, true, intrinsic_ddx}, -+ {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, -+ {"ddx_fine", 1, true, intrinsic_ddx_fine}, - {"ddy", 1, true, intrinsic_ddy}, -+ {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, -+ {"ddy_fine", 1, true, intrinsic_ddy_fine}, - {"distance", 2, true, intrinsic_distance}, - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, -@@ -3514,8 +3697,10 @@ intrinsic_functions[] = - {"smoothstep", 3, true, intrinsic_smoothstep}, - {"sqrt", 1, true, intrinsic_sqrt}, - {"step", 2, true, intrinsic_step}, -+ {"tex1D", -1, false, intrinsic_tex1D}, - {"tex2D", -1, false, intrinsic_tex2D}, - {"tex3D", -1, false, intrinsic_tex3D}, -+ {"texCUBE", -1, false, intrinsic_texCUBE}, - {"transpose", 1, true, intrinsic_transpose}, - {"trunc", 1, true, intrinsic_trunc}, - }; -@@ -3527,7 +3712,7 @@ static int intrinsic_function_name_compare(const void *a, const void *b) - return strcmp(a, func->name); - } - --static struct list *add_call(struct hlsl_ctx *ctx, const char *name, -+static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, - struct parse_initializer *args, const struct vkd3d_shader_location *loc) - { - struct intrinsic_function *intrinsic; -@@ -3535,79 +3720,8 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - - if ((decl = find_function_call(ctx, name, args, loc))) - { -- struct hlsl_ir_node *call; -- unsigned int i; -- -- assert(args->args_count == decl->parameters.count); -- -- for (i = 0; i < decl->parameters.count; ++i) -- { -- struct hlsl_ir_var *param = decl->parameters.vars[i]; -- struct hlsl_ir_node *arg = args->args[i]; -- -- if (!hlsl_types_are_equal(arg->data_type, param->data_type)) -- { -- struct hlsl_ir_node *cast; -- -- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) -- goto fail; -- args->args[i] = cast; -- arg = cast; -- } -- -- if (param->storage_modifiers & HLSL_STORAGE_IN) -- { -- struct hlsl_ir_node *store; -- -- if (!(store = hlsl_new_simple_store(ctx, param, arg))) -- goto fail; -- list_add_tail(args->instrs, &store->entry); -- } -- } -- -- if (!(call = hlsl_new_call(ctx, decl, loc))) -+ if (!add_user_call(ctx, decl, args, loc)) - goto fail; -- list_add_tail(args->instrs, &call->entry); -- -- for (i = 0; i < decl->parameters.count; ++i) -- { -- struct hlsl_ir_var *param = decl->parameters.vars[i]; -- struct hlsl_ir_node *arg = args->args[i]; -- -- if (param->storage_modifiers & HLSL_STORAGE_OUT) -- { -- struct hlsl_ir_load *load; -- -- if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) -- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, -- "Output argument to \"%s\" is const.", decl->func->name); -- -- if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) -- goto fail; -- list_add_tail(args->instrs, &load->node.entry); -- -- if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) -- goto fail; -- } -- } -- -- if (decl->return_var) -- { -- struct hlsl_ir_load *load; -- -- if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) -- goto fail; -- list_add_tail(args->instrs, &load->node.entry); -- } -- else -- { -- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -- struct hlsl_ir_node *expr; -- -- if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) -- goto fail; -- list_add_tail(args->instrs, &expr->entry); -- } - } - else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), - sizeof(*intrinsic_functions), intrinsic_function_name_compare))) -@@ -3662,7 +3776,7 @@ fail: - return NULL; - } - --static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, -+static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, - struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_load *load; -@@ -3692,7 +3806,7 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(params->instrs, &load->node.entry); -+ hlsl_block_add_instr(params->instrs, &load->node); - - vkd3d_free(params->args); - return params->instrs; -@@ -3733,7 +3847,7 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct - return false; - } - --static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -3761,7 +3875,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - } - if (multisampled) - { -- if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) - return false; - } -@@ -3769,7 +3883,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - assert(offset_dim); - if (params->args_count > 1 + multisampled) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -3779,7 +3893,7 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - } - - /* +1 for the mipmap level for non-multisampled textures */ -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) - return false; - -@@ -3788,11 +3902,11 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, stru - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - return true; - } - --static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -3829,13 +3943,13 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - - if (offset_dim && params->args_count > 2) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -3851,12 +3965,12 @@ static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - - return true; - } - --static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -3899,17 +4013,17 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - -- if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - load_params.cmp = params->args[2]; - - if (offset_dim && params->args_count > 3) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -3925,12 +4039,12 @@ static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - - return true; - } - --static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -3997,7 +4111,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - } - else if (offset_dim && params->args_count > 2) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4022,7 +4136,7 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; - -@@ -4032,11 +4146,187 @@ static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, st - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ -+static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, -+ struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *load; -+ -+ if (!dest) -+ return true; -+ -+ if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) -+ return false; -+ -+ if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) -+ return false; -+ -+ return true; -+} -+ -+static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ bool uint_resinfo, has_uint_arg, has_float_arg; -+ struct hlsl_resource_load_params load_params; -+ struct hlsl_ir_node *sample_info, *res_info; -+ struct hlsl_ir_node *zero = NULL, *void_ret; -+ struct hlsl_type *uint_type, *float_type; -+ unsigned int i, j; -+ enum func_argument -+ { -+ ARG_MIP_LEVEL, -+ ARG_WIDTH, -+ ARG_HEIGHT, -+ ARG_ELEMENT_COUNT, -+ ARG_LEVEL_COUNT, -+ ARG_SAMPLE_COUNT, -+ ARG_MAX_ARGS, -+ }; -+ struct hlsl_ir_node *args[ARG_MAX_ARGS] = { 0 }; -+ static const struct overload -+ { -+ enum hlsl_sampler_dim sampler_dim; -+ unsigned int args_count; -+ enum func_argument args[ARG_MAX_ARGS]; -+ } -+ overloads[] = -+ { -+ { HLSL_SAMPLER_DIM_1D, 1, { ARG_WIDTH } }, -+ { HLSL_SAMPLER_DIM_1D, 3, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_1DARRAY, 2, { ARG_WIDTH, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_1DARRAY, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2D, 2, { ARG_WIDTH, ARG_HEIGHT } }, -+ { HLSL_SAMPLER_DIM_2D, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_3D, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_3D, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBE, 2, { ARG_WIDTH, ARG_HEIGHT } }, -+ { HLSL_SAMPLER_DIM_CUBE, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBEARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBEARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DMS, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_SAMPLE_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DMSARRAY, 4, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_SAMPLE_COUNT } }, -+ }; -+ const struct overload *o = NULL; -+ -+ if (object_type->sampler_dim > HLSL_SAMPLER_DIM_LAST_TEXTURE) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "GetDimensions() is not defined for this type."); -+ } -+ -+ uint_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); -+ float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); -+ has_uint_arg = has_float_arg = false; -+ for (i = 0; i < ARRAY_SIZE(overloads); ++i) -+ { -+ const struct overload *iter = &overloads[i]; -+ -+ if (iter->sampler_dim == object_type->sampler_dim && iter->args_count == params->args_count) -+ { -+ for (j = 0; j < params->args_count; ++j) -+ { -+ args[iter->args[j]] = params->args[j]; -+ -+ /* Input parameter. */ -+ if (iter->args[j] == ARG_MIP_LEVEL) -+ { -+ if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ { -+ return false; -+ } -+ -+ continue; -+ } -+ -+ has_float_arg |= hlsl_types_are_equal(params->args[j]->data_type, float_type); -+ has_uint_arg |= hlsl_types_are_equal(params->args[j]->data_type, uint_type); -+ -+ if (params->args[j]->data_type->class != HLSL_CLASS_SCALAR) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected scalar arguments."); -+ break; -+ } -+ } -+ o = iter; -+ break; -+ } -+ } -+ uint_resinfo = !has_float_arg && has_uint_arg; -+ -+ if (!o) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, object_type))) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Unexpected number of arguments %u for %s.%s().", params->args_count, string->buffer, name); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ } -+ -+ if (!args[ARG_MIP_LEVEL]) -+ { -+ if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ args[ARG_MIP_LEVEL] = zero; -+ } -+ -+ memset(&load_params, 0, sizeof(load_params)); -+ load_params.type = HLSL_RESOURCE_RESINFO; -+ load_params.resource = object; -+ load_params.lod = args[ARG_MIP_LEVEL]; -+ load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); -+ -+ if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, res_info); -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) -+ return false; -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_HEIGHT], res_info, 1, loc)) -+ return false; -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_ELEMENT_COUNT], res_info, -+ object_type->sampler_dim == HLSL_SAMPLER_DIM_1DARRAY ? 1 : 2, loc)) -+ { -+ return false; -+ } -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_LEVEL_COUNT], res_info, 3, loc)) -+ return false; -+ -+ if (args[ARG_SAMPLE_COUNT]) -+ { -+ memset(&load_params, 0, sizeof(load_params)); -+ load_params.type = HLSL_RESOURCE_SAMPLE_INFO; -+ load_params.resource = object; -+ load_params.format = args[ARG_SAMPLE_COUNT]->data_type; -+ if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, sample_info); -+ -+ if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) -+ return false; -+ } -+ -+ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) -+ return false; -+ hlsl_block_add_instr(block, void_ret); -+ - return true; - } - --static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4078,17 +4368,17 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.coords = params->args[1]; - -- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - load_params.lod = params->args[2]; - - if (offset_dim && params->args_count > 3) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4102,11 +4392,11 @@ static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - return true; - } - --static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4145,21 +4435,21 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr - return false; - } - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.coords = params->args[1]; - -- if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], -+ if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.ddx = params->args[2]; - -- if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], -+ if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.ddy = params->args[3]; - - if (offset_dim && params->args_count > 4) - { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } -@@ -4173,14 +4463,14 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instr - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(instrs, &load->entry); -+ hlsl_block_add_instr(block, load); - return true; - } - - static const struct method_function - { - const char *name; -- bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+ bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); - } - object_methods[] = -@@ -4191,6 +4481,8 @@ object_methods[] = - { "GatherGreen", add_gather_method_call }, - { "GatherRed", add_gather_method_call }, - -+ { "GetDimensions", add_getdimensions_method_call }, -+ - { "Load", add_load_method_call }, - - { "Sample", add_sample_method_call }, -@@ -4208,7 +4500,7 @@ static int object_method_function_name_compare(const void *a, const void *b) - return strcmp(a, func->name); - } - --static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -@@ -4229,7 +4521,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl - if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), - sizeof(*method), object_method_function_name_compare))) - { -- return method->handler(ctx, instrs, object, name, params, loc); -+ return method->handler(ctx, block, object, name, params, loc); - } - else - { -@@ -4272,6 +4564,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - char *name; - DWORD modifiers; - struct hlsl_ir_node *instr; -+ struct hlsl_block *block; - struct list *list; - struct parse_fields fields; - struct parse_function function; -@@ -4399,38 +4692,9 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %token C_INTEGER - %token PRE_LINE - --%type add_expr --%type assignment_expr --%type bitand_expr --%type bitor_expr --%type bitxor_expr --%type compound_statement --%type conditional_expr --%type declaration --%type declaration_statement --%type discard_statement --%type equality_expr --%type expr --%type expr_optional --%type expr_statement --%type initializer_expr --%type jump_statement --%type logicand_expr --%type logicor_expr --%type loop_statement --%type mul_expr --%type postfix_expr --%type primary_expr --%type relational_expr --%type selection_statement --%type shift_expr --%type statement --%type statement_list --%type struct_declaration - %type type_specs --%type unary_expr - %type variables_def --%type variables_def_optional -+%type variables_def_typed - - %token VAR_IDENTIFIER - %token NEW_IDENTIFIER -@@ -4446,6 +4710,35 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type attribute_list - %type attribute_list_optional - -+%type add_expr -+%type assignment_expr -+%type bitand_expr -+%type bitor_expr -+%type bitxor_expr -+%type compound_statement -+%type conditional_expr -+%type declaration -+%type declaration_statement -+%type equality_expr -+%type expr -+%type expr_optional -+%type expr_statement -+%type initializer_expr -+%type jump_statement -+%type logicand_expr -+%type logicor_expr -+%type loop_statement -+%type mul_expr -+%type postfix_expr -+%type primary_expr -+%type relational_expr -+%type shift_expr -+%type selection_statement -+%type statement -+%type statement_list -+%type struct_declaration_without_vars -+%type unary_expr -+ - %type boolean - - %type buffer_type -@@ -4493,6 +4786,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type type_spec - %type variable_decl - %type variable_def -+%type variable_def_typed - - %% - -@@ -4502,9 +4796,9 @@ hlsl_prog: - | hlsl_prog buffer_declaration buffer_body - | hlsl_prog declaration_statement - { -- if (!list_empty($2)) -+ if (!list_empty(&$2->instrs)) - hlsl_fixme(ctx, &@2, "Uniform initializer."); -- destroy_instr_list($2); -+ destroy_block($2); - } - | hlsl_prog preproc_directive - | hlsl_prog ';' -@@ -4561,25 +4855,19 @@ preproc_directive: - } - } - --struct_declaration: -- var_modifiers struct_spec variables_def_optional ';' -+struct_declaration_without_vars: -+ var_modifiers struct_spec ';' - { -- struct hlsl_type *type; -- unsigned int modifiers = $1; -+ if (!$2->name) -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Anonymous struct type must declare a variable."); - -- if (!$3) -- { -- if (!$2->name) -- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Anonymous struct type must declare a variable."); -- if (modifiers) -- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers are not allowed on struct type declarations."); -- } -+ if ($1) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers are not allowed on struct type declarations."); - -- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; -- $$ = declare_vars(ctx, type, modifiers, &@1, $3); - } - - struct_spec: -@@ -4686,7 +4974,7 @@ attribute: - YYABORT; - } - $$->name = $2; -- list_init(&$$->instrs); -+ hlsl_block_init(&$$->instrs); - $$->loc = @$; - $$->args_count = 0; - } -@@ -4701,8 +4989,8 @@ attribute: - YYABORT; - } - $$->name = $2; -- list_init(&$$->instrs); -- list_move_tail(&$$->instrs, $4.instrs); -+ hlsl_block_init(&$$->instrs); -+ hlsl_block_add_block(&$$->instrs, $4.instrs); - vkd3d_free($4.instrs); - $$->loc = @$; - $$->args_count = $4.args_count; -@@ -4758,15 +5046,15 @@ func_declaration: - "Function \"%s\" is already defined.", decl->func->name); - hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, - "\"%s\" was previously defined here.", decl->func->name); -- hlsl_free_instr_list($2); -+ destroy_block($2); - } - else - { - size_t i; - - decl->has_body = true; -- list_move_tail(&decl->body.instrs, $2); -- vkd3d_free($2); -+ hlsl_block_add_block(&decl->body, $2); -+ destroy_block($2); - - /* Semantics are taken from whichever definition has a body. - * We can't just replace the hlsl_ir_var pointers, though: if -@@ -4817,6 +5105,9 @@ func_prototype_no_attrs: - struct hlsl_ir_var *var; - struct hlsl_type *type; - -+ /* Functions are unconditionally inlined. */ -+ modifiers &= ~HLSL_MODIFIER_INLINE; -+ - if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Only majority modifiers are allowed on functions."); -@@ -4943,7 +5234,7 @@ func_prototype: - compound_statement: - '{' '}' - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - | '{' scope_start statement_list '}' -@@ -5261,7 +5552,12 @@ type_no_void: - { - validate_texture_format_type(ctx, $3, &@3); - -- /* TODO: unspecified sample count is not allowed for all targets */ -+ if (shader_profile_version_lt(ctx, 4, 1)) -+ { -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -+ } -+ - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } - | texture_ms_type '<' type ',' shift_expr '>' -@@ -5270,7 +5566,7 @@ type_no_void: - struct hlsl_block block; - - hlsl_block_init(&block); -- list_move_tail(&block.instrs, $5); -+ hlsl_block_add_block(&block, $5); - - sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); - -@@ -5325,7 +5621,7 @@ type_no_void: - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); - if ($$->is_minimum_precision) - { -- if (ctx->profile->major_version < 4) -+ if (shader_profile_version_lt(ctx, 4, 0)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support minimum-precision types."); -@@ -5354,10 +5650,10 @@ type: - - declaration_statement: - declaration -- | struct_declaration -+ | struct_declaration_without_vars - | typedef - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - -@@ -5416,22 +5712,11 @@ type_spec: - } - - declaration: -- var_modifiers type variables_def ';' -+ variables_def_typed ';' - { -- struct hlsl_type *type; -- unsigned int modifiers = $1; -- -- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ if (!($$ = initialize_vars(ctx, $1))) - YYABORT; -- $$ = declare_vars(ctx, type, modifiers, &@1, $3); -- } -- --variables_def_optional: -- %empty -- { -- $$ = NULL; - } -- | variables_def - - variables_def: - variable_def -@@ -5446,6 +5731,33 @@ variables_def: - list_add_tail($$, &$3->entry); - } - -+variables_def_typed: -+ variable_def_typed -+ { -+ if (!($$ = make_empty_list(ctx))) -+ YYABORT; -+ list_add_head($$, &$1->entry); -+ -+ declare_var(ctx, $1); -+ } -+ | variables_def_typed ',' variable_def -+ { -+ struct parse_variable_def *head_def; -+ -+ assert(!list_empty($1)); -+ head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); -+ -+ assert(head_def->basic_type); -+ $3->basic_type = head_def->basic_type; -+ $3->modifiers = head_def->modifiers; -+ $3->modifiers_loc = head_def->modifiers_loc; -+ -+ declare_var(ctx, $3); -+ -+ $$ = $1; -+ list_add_tail($$, &$3->entry); -+ } -+ - variable_decl: - any_identifier arrays colon_attribute - { -@@ -5461,7 +5773,7 @@ state: - any_identifier '=' expr ';' - { - vkd3d_free($1); -- hlsl_free_instr_list($3); -+ destroy_block($3); - } - - state_block_start: -@@ -5487,6 +5799,38 @@ variable_def: - ctx->in_state_block = 0; - } - -+variable_def_typed: -+ var_modifiers struct_spec variable_def -+ { -+ unsigned int modifiers = $1; -+ struct hlsl_type *type; -+ -+ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ YYABORT; -+ -+ check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ -+ $$ = $3; -+ $$->basic_type = type; -+ $$->modifiers = modifiers; -+ $$->modifiers_loc = @1; -+ } -+ | var_modifiers type variable_def -+ { -+ unsigned int modifiers = $1; -+ struct hlsl_type *type; -+ -+ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ YYABORT; -+ -+ check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ -+ $$ = $3; -+ $$->basic_type = type; -+ $$->modifiers = modifiers; -+ $$->modifiers_loc = @1; -+ } -+ - arrays: - %empty - { -@@ -5495,17 +5839,12 @@ arrays: - } - | '[' expr ']' arrays - { -- struct hlsl_block block; - uint32_t *new_array; - unsigned int size; - -- hlsl_clone_block(ctx, &block, &ctx->static_initializers); -- list_move_tail(&block.instrs, $2); -- -- size = evaluate_static_expression_as_uint(ctx, &block, &@2); -+ size = evaluate_static_expression_as_uint(ctx, $2, &@2); - -- hlsl_block_cleanup(&block); -- vkd3d_free($2); -+ destroy_block($2); - - $$ = $4; - -@@ -5610,6 +5949,10 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); - } -+ | KW_INLINE var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); -+ } - - - complex_initializer: -@@ -5618,10 +5961,10 @@ complex_initializer: - $$.args_count = 1; - if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } -- $$.args[0] = node_from_list($1); -+ $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; - } -@@ -5653,7 +5996,7 @@ complex_initializer_list: - $$.args = new_args; - for (i = 0; i < $3.args_count; ++i) - $$.args[$$.args_count++] = $3.args[i]; -- list_move_tail($$.instrs, $3.instrs); -+ hlsl_block_add_block($$.instrs, $3.instrs); - free_parse_initializer(&$3); - } - -@@ -5666,10 +6009,10 @@ initializer_expr_list: - $$.args_count = 1; - if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } -- $$.args[0] = node_from_list($1); -+ $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; - } -@@ -5681,13 +6024,13 @@ initializer_expr_list: - if (!(new_args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) - { - free_parse_initializer(&$$); -- destroy_instr_list($3); -+ destroy_block($3); - YYABORT; - } - $$.args = new_args; -- $$.args[$$.args_count++] = node_from_list($3); -- list_move_tail($$.instrs, $3); -- vkd3d_free($3); -+ $$.args[$$.args_count++] = node_from_block($3); -+ hlsl_block_add_block($$.instrs, $3); -+ destroy_block($3); - } - - boolean: -@@ -5705,15 +6048,14 @@ statement_list: - | statement_list statement - { - $$ = $1; -- list_move_tail($$, $2); -- vkd3d_free($2); -+ hlsl_block_add_block($$, $2); -+ destroy_block($2); - } - - statement: - declaration_statement - | expr_statement - | compound_statement -- | discard_statement - | jump_statement - | selection_statement - | loop_statement -@@ -5721,47 +6063,67 @@ statement: - jump_statement: - KW_RETURN expr ';' - { -- if (!add_return(ctx, $2, node_from_list($2), &@1)) -- YYABORT; - $$ = $2; -+ if (!add_return(ctx, $$, node_from_block($$), &@1)) -+ YYABORT; - } - | KW_RETURN ';' - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - if (!add_return(ctx, $$, NULL, &@1)) - YYABORT; - } -- --discard_statement: -- KW_DISCARD ';' -+ | KW_DISCARD ';' - { -- struct hlsl_ir_node *discard; -+ struct hlsl_ir_node *discard, *c; - -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; -- if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) -+ -+ if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) -+ return false; -+ hlsl_block_add_instr($$, c); -+ -+ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) - return false; -- list_add_tail($$, &discard->entry); -+ hlsl_block_add_instr($$, discard); - } - - selection_statement: -- KW_IF '(' expr ')' if_body -+ attribute_list_optional KW_IF '(' expr ')' if_body - { -- struct hlsl_ir_node *condition = node_from_list($3); -- struct hlsl_block then_block, else_block; -+ struct hlsl_ir_node *condition = node_from_block($4); -+ const struct parse_attribute_list *attributes = &$1; - struct hlsl_ir_node *instr; -+ unsigned int i; -+ -+ if (attribute_list_has_duplicates(attributes)) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); -+ -+ for (i = 0; i < attributes->count; ++i) -+ { -+ const struct hlsl_attribute *attr = attributes->attrs[i]; - -- hlsl_block_init(&then_block); -- list_move_tail(&then_block.instrs, $5.then_block); -- hlsl_block_init(&else_block); -- if ($5.else_block) -- list_move_tail(&else_block.instrs, $5.else_block); -- vkd3d_free($5.then_block); -- vkd3d_free($5.else_block); -+ if (!strcmp(attr->name, "branch") -+ || !strcmp(attr->name, "flatten")) -+ { -+ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); -+ } -+ else -+ { -+ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); -+ } -+ } - -- if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) -+ if (!(instr = hlsl_new_if(ctx, condition, $6.then_block, $6.else_block, &@2))) -+ { -+ destroy_block($6.then_block); -+ destroy_block($6.else_block); - YYABORT; -+ } -+ destroy_block($6.then_block); -+ destroy_block($6.else_block); - if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) - { - struct vkd3d_string_buffer *string; -@@ -5771,8 +6133,8 @@ selection_statement: - "if condition type %s is not scalar.", string->buffer); - hlsl_release_string_buffer(ctx, string); - } -- $$ = $3; -- list_add_tail($$, &instr->entry); -+ $$ = $4; -+ hlsl_block_add_instr($$, instr); - } - - if_body: -@@ -5810,7 +6172,7 @@ loop_statement: - expr_optional: - %empty - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - | expr -@@ -5826,7 +6188,7 @@ func_arguments: - { - $$.args = NULL; - $$.args_count = 0; -- if (!($$.instrs = make_empty_list(ctx))) -+ if (!($$.instrs = make_empty_block(ctx))) - YYABORT; - $$.braces = false; - } -@@ -5839,7 +6201,7 @@ primary_expr: - - if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, c))) -+ if (!($$ = make_block(ctx, c))) - YYABORT; - } - | C_INTEGER -@@ -5848,7 +6210,7 @@ primary_expr: - - if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, c))) -+ if (!($$ = make_block(ctx, c))) - YYABORT; - } - | boolean -@@ -5857,7 +6219,7 @@ primary_expr: - - if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, c))) -+ if (!($$ = make_block(ctx, c))) - { - hlsl_free_instr(c); - YYABORT; -@@ -5875,7 +6237,7 @@ primary_expr: - } - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &load->node))) -+ if (!($$ = make_block(ctx, &load->node))) - YYABORT; - } - | '(' expr ')' -@@ -5903,7 +6265,7 @@ primary_expr: - YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &load->node))) -+ if (!($$ = make_block(ctx, &load->node))) - YYABORT; - } - else -@@ -5919,7 +6281,7 @@ postfix_expr: - { - if (!add_increment(ctx, $1, false, true, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } - $$ = $1; -@@ -5928,14 +6290,14 @@ postfix_expr: - { - if (!add_increment(ctx, $1, true, true, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } - $$ = $1; - } - | postfix_expr '.' any_identifier - { -- struct hlsl_ir_node *node = node_from_list($1); -+ struct hlsl_ir_node *node = node_from_block($1); - - if (node->data_type->class == HLSL_CLASS_STRUCT) - { -@@ -5963,7 +6325,7 @@ postfix_expr: - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); - YYABORT; - } -- list_add_tail($1, &swizzle->entry); -+ hlsl_block_add_instr($1, swizzle); - $$ = $1; - } - else -@@ -5974,17 +6336,17 @@ postfix_expr: - } - | postfix_expr '[' expr ']' - { -- struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); -+ struct hlsl_ir_node *array = node_from_block($1), *index = node_from_block($3); - -- list_move_head($1, $3); -- vkd3d_free($3); -+ hlsl_block_add_block($3, $1); -+ destroy_block($1); - -- if (!add_array_access(ctx, $1, array, index, &@2)) -+ if (!add_array_access(ctx, $3, array, index, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($3); - YYABORT; - } -- $$ = $1; -+ $$ = $3; - } - - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ -@@ -6025,14 +6387,14 @@ postfix_expr: - } - | postfix_expr '.' any_identifier '(' func_arguments ')' - { -- struct hlsl_ir_node *object = node_from_list($1); -+ struct hlsl_ir_node *object = node_from_block($1); - -- list_move_tail($1, $5.instrs); -+ hlsl_block_add_block($1, $5.instrs); - vkd3d_free($5.instrs); - - if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) - { -- hlsl_free_instr_list($1); -+ destroy_block($1); - vkd3d_free($5.args); - YYABORT; - } -@@ -6046,7 +6408,7 @@ unary_expr: - { - if (!add_increment(ctx, $2, false, false, &@1)) - { -- destroy_instr_list($2); -+ destroy_block($2); - YYABORT; - } - $$ = $2; -@@ -6055,7 +6417,7 @@ unary_expr: - { - if (!add_increment(ctx, $2, true, false, &@1)) - { -- destroy_instr_list($2); -+ destroy_block($2); - YYABORT; - } - $$ = $2; -@@ -6066,23 +6428,23 @@ unary_expr: - } - | '-' unary_expr - { -- add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); -+ add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_block($2), &@1); - $$ = $2; - } - | '~' unary_expr - { -- add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); -+ add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_block($2), &@1); - $$ = $2; - } - | '!' unary_expr - { -- add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); -+ add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_block($2), &@1); - $$ = $2; - } - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ - | '(' var_modifiers type arrays ')' unary_expr - { -- struct hlsl_type *src_type = node_from_list($6)->data_type; -+ struct hlsl_type *src_type = node_from_block($6)->data_type; - struct hlsl_type *dst_type; - unsigned int i; - -@@ -6118,9 +6480,9 @@ unary_expr: - YYABORT; - } - -- if (!add_cast(ctx, $6, node_from_list($6), dst_type, &@3)) -+ if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) - { -- hlsl_free_instr_list($6); -+ destroy_block($6); - YYABORT; - } - $$ = $6; -@@ -6130,120 +6492,122 @@ mul_expr: - unary_expr - | mul_expr '*' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); - } - | mul_expr '/' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); - } - | mul_expr '%' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); - } - - add_expr: - mul_expr - | add_expr '+' mul_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); - } - | add_expr '-' mul_expr - { - struct hlsl_ir_node *neg; - -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) -+ if (!(neg = add_unary_arithmetic_expr(ctx, $3, HLSL_OP1_NEG, node_from_block($3), &@2))) - YYABORT; -- list_add_tail($3, &neg->entry); -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); - } - - shift_expr: - add_expr - | shift_expr OP_LEFTSHIFT add_expr - { -- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); - } - | shift_expr OP_RIGHTSHIFT add_expr - { -- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); - } - - relational_expr: - shift_expr - | relational_expr '<' shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); - } - | relational_expr '>' shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); -+ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); - } - | relational_expr OP_LE shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); - } - | relational_expr OP_GE shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); - } - - equality_expr: - relational_expr - | equality_expr OP_EQ relational_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); - } - | equality_expr OP_NE relational_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); - } - - bitand_expr: - equality_expr - | bitand_expr '&' equality_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); - } - - bitxor_expr: - bitand_expr - | bitxor_expr '^' bitand_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); - } - - bitor_expr: - bitxor_expr - | bitor_expr '|' bitxor_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); - } - - logicand_expr: - bitor_expr - | logicand_expr OP_AND bitor_expr - { -- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); - } - - logicor_expr: - logicand_expr - | logicor_expr OP_OR logicand_expr - { -- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); - } - - conditional_expr: - logicor_expr - | logicor_expr '?' expr ':' assignment_expr - { -- struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); -+ struct hlsl_ir_node *cond = node_from_block($1); -+ struct hlsl_ir_node *first = node_from_block($3); -+ struct hlsl_ir_node *second = node_from_block($5); -+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = { 0 }; - struct hlsl_type *common_type; - -- list_move_tail($1, $3); -- list_move_tail($1, $5); -- vkd3d_free($3); -- vkd3d_free($5); -+ hlsl_block_add_block($1, $3); -+ hlsl_block_add_block($1, $5); -+ destroy_block($3); -+ destroy_block($5); - - if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) - YYABORT; -@@ -6254,7 +6618,10 @@ conditional_expr: - if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) - YYABORT; - -- if (!hlsl_add_conditional(ctx, $1, cond, first, second)) -+ args[0] = cond; -+ args[1] = first; -+ args[2] = second; -+ if (!add_expr(ctx, $1, HLSL_OP3_TERNARY, args, common_type, &@1)) - YYABORT; - $$ = $1; - } -@@ -6264,15 +6631,15 @@ assignment_expr: - conditional_expr - | unary_expr assign_op assignment_expr - { -- struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); -+ struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); - - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); - YYABORT; - } -- list_move_tail($3, $1); -- vkd3d_free($1); -+ hlsl_block_add_block($3, $1); -+ destroy_block($1); - if (!add_assignment(ctx, $3, lhs, $2, rhs)) - YYABORT; - $$ = $3; -@@ -6329,6 +6696,6 @@ expr: - | expr ',' assignment_expr - { - $$ = $1; -- list_move_tail($$, $3); -- vkd3d_free($3); -+ hlsl_block_add_block($$, $3); -+ destroy_block($3); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 765b1907426..be024842164 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -97,6 +97,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_regset regset = hlsl_type_get_regset(deref->data_type); - struct hlsl_ir_node *offset = NULL; - struct hlsl_type *type; - unsigned int i; -@@ -111,7 +112,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - struct hlsl_block idx_block; - - if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, -- deref->offset_regset, loc))) -+ regset, loc))) - return NULL; - - hlsl_block_add_block(block, &idx_block); -@@ -126,7 +127,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - struct hlsl_ir_node *instr) - { -- const struct hlsl_type *type; -+ struct hlsl_type *type; - struct hlsl_ir_node *offset; - struct hlsl_block block; - -@@ -145,7 +146,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der - return true; - } - -- deref->offset_regset = hlsl_type_get_regset(type); -+ deref->data_type = type; - - if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) - return false; -@@ -160,12 +161,12 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der - /* Split uniforms into two variables representing the constant and temp - * registers, and copy the former to the latter, so that writes to uniforms - * work. */ --static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) -+static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) - { -- struct vkd3d_string_buffer *name; - struct hlsl_ir_var *uniform; - struct hlsl_ir_node *store; - struct hlsl_ir_load *load; -+ char *new_name; - - /* Use the synthetic name for the temp, rather than the uniform, so that we - * can write the uniform name into the shader reflection data. */ -@@ -179,15 +180,13 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru - uniform->is_param = temp->is_param; - uniform->buffer = temp->buffer; - -- if (!(name = hlsl_get_string_buffer(ctx))) -+ if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) - return; -- vkd3d_string_buffer_printf(name, "", temp->name); -- temp->name = hlsl_strdup(ctx, name->buffer); -- hlsl_release_string_buffer(ctx, name); -+ temp->name = new_name; - - if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) - return; -- list_add_head(instrs, &load->node.entry); -+ list_add_head(&block->instrs, &load->node.entry); - - if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) - return; -@@ -234,16 +233,15 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - uint32_t index, bool output, const struct vkd3d_shader_location *loc) - { - struct hlsl_semantic new_semantic; -- struct vkd3d_string_buffer *name; - struct hlsl_ir_var *ext_var; -+ char *new_name; - -- if (!(name = hlsl_get_string_buffer(ctx))) -+ if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) - return NULL; -- vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, index); - - LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!ascii_strcasecmp(ext_var->name, name->buffer)) -+ if (!ascii_strcasecmp(ext_var->name, new_name)) - { - if (output) - { -@@ -270,25 +268,23 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - } - } - -- hlsl_release_string_buffer(ctx, name); -+ vkd3d_free(new_name); - return ext_var; - } - } - - if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) - { -- hlsl_release_string_buffer(ctx, name); -+ vkd3d_free(new_name); - return NULL; - } - new_semantic.index = index; -- if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, loc, &new_semantic, -- modifiers, NULL))) -+ if (!(ext_var = hlsl_new_var(ctx, new_name, type, loc, &new_semantic, modifiers, NULL))) - { -- hlsl_release_string_buffer(ctx, name); -+ vkd3d_free(new_name); - hlsl_cleanup_semantic(&new_semantic); - return NULL; - } -- hlsl_release_string_buffer(ctx, name); - if (output) - ext_var->is_output_semantic = 1; - else -@@ -300,7 +296,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - return ext_var; - } - --static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, -+static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; -@@ -320,9 +316,10 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - if (!semantic->name) - return; - -- vector_type_src = hlsl_get_vector_type(ctx, type->base_type, -- (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); - vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ vector_type_src = vector_type_dst; -+ if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -+ vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); - - for (i = 0; i < hlsl_type_major_size(type); ++i) - { -@@ -363,7 +360,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - } - } - --static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct vkd3d_shader_location *loc = &lhs->node.loc; -@@ -405,30 +402,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs - return; - list_add_after(&c->entry, &element_load->node.entry); - -- prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); -+ prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); - } - } - else - { -- prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); -+ prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); - } - } - - /* Split inputs into two variables representing the semantic and temp registers, - * and copy the former to the latter, so that writes to input variables work. */ --static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) -+static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_head(instrs, &load->node.entry); -+ list_add_head(&block->instrs, &load->node.entry); - -- prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - --static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, -+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct hlsl_type *type = rhs->node.data_type, *vector_type; -@@ -463,11 +460,11 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - } - else - { -@@ -475,16 +472,16 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - } - - if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) - return; -- list_add_tail(instrs, &store->entry); -+ hlsl_block_add_instr(block, store); - } - } - --static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, - unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct vkd3d_shader_location *loc = &rhs->node.loc; -@@ -519,34 +516,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs - - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- list_add_tail(instrs, &c->entry); -+ hlsl_block_add_instr(block, c); - - if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) - return; -- list_add_tail(instrs, &element_load->node.entry); -+ hlsl_block_add_instr(block, &element_load->node); - -- append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); -+ append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); - } - } - else - { -- append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); -+ append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); - } - } - - /* Split outputs into two variables representing the temp and semantic - * registers, and copy the former to the latter, so that reads from output - * variables work. */ --static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) -+static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - -- append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -@@ -573,6 +570,37 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - return progress; - } - -+typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); -+ -+static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ PFN_lower_func func = context; -+ struct hlsl_block block; -+ -+ hlsl_block_init(&block); -+ if (func(ctx, instr, &block)) -+ { -+ struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); -+ -+ list_move_before(&instr->entry, &block.instrs); -+ hlsl_replace_node(instr, replacement); -+ return true; -+ } -+ else -+ { -+ hlsl_block_cleanup(&block); -+ return false; -+ } -+} -+ -+/* Specific form of transform_ir() for passes which convert a single instruction -+ * to a block of one or more instructions. This helper takes care of setting up -+ * the block and calling hlsl_replace_node_with_block(). */ -+static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) -+{ -+ return hlsl_transform_ir(ctx, call_lower_func, block, func); -+} -+ - static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - bool res; -@@ -666,7 +694,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, - return; - list_add_after(&cf_instr->entry, &load->node.entry); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) - return; - hlsl_block_add_instr(&then_block, jump); - -@@ -906,6 +934,55 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h - return &coords_load->node; - } - -+/* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that -+ * represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of -+ * an assignment or as a value made from different components of the matrix. The former cases should -+ * have already been split into several separate assignments, but the latter are lowered by this -+ * pass. */ -+static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_swizzle *swizzle; -+ struct hlsl_ir_load *var_load; -+ struct hlsl_deref var_deref; -+ struct hlsl_type *matrix_type; -+ struct hlsl_ir_var *var; -+ unsigned int x, y, k, i; -+ -+ if (instr->type != HLSL_IR_SWIZZLE) -+ return false; -+ swizzle = hlsl_ir_swizzle(instr); -+ matrix_type = swizzle->val.node->data_type; -+ if (matrix_type->class != HLSL_CLASS_MATRIX) -+ return false; -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "matrix-swizzle", instr->data_type, &instr->loc))) -+ return false; -+ hlsl_init_simple_deref_from_var(&var_deref, var); -+ -+ for (i = 0; i < instr->data_type->dimx; ++i) -+ { -+ struct hlsl_block store_block; -+ struct hlsl_ir_node *load; -+ -+ y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; -+ x = (swizzle->swizzle >> 8 * i) & 0xf; -+ k = y * matrix_type->dimx + x; -+ -+ if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) -+ return false; -+ -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, load)) -+ return false; -+ hlsl_block_add_block(block, &store_block); -+ } -+ -+ if (!(var_load = hlsl_new_var_load(ctx, var, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, &var_load->node); -+ -+ return true; -+} -+ - /* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct - * record access before knowing if they will be used in the lhs of an assignment --in which case - * they are lowered into a deref-- or as the load of an element within a larger value. -@@ -1689,7 +1766,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - -- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!load->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource must have a single uniform source."); -@@ -1704,7 +1781,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - - if (load->sampler.var) - { -- if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!load->sampler.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler must have a single uniform source."); -@@ -1722,7 +1799,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - { - struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - -- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!store->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); -@@ -1889,7 +1966,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - - if (rhs->type != HLSL_IR_LOAD) - { -- hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); - return false; - } - -@@ -2066,6 +2143,137 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - return false; - } - -+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. -+ * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ -+static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_resource_load *load; -+ struct vkd3d_string_buffer *name; -+ struct hlsl_ir_var *var; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ load = hlsl_ir_resource_load(instr); -+ -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_LOAD: -+ case HLSL_RESOURCE_GATHER_RED: -+ case HLSL_RESOURCE_GATHER_GREEN: -+ case HLSL_RESOURCE_GATHER_BLUE: -+ case HLSL_RESOURCE_GATHER_ALPHA: -+ case HLSL_RESOURCE_RESINFO: -+ case HLSL_RESOURCE_SAMPLE_CMP: -+ case HLSL_RESOURCE_SAMPLE_CMP_LZ: -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ case HLSL_RESOURCE_SAMPLE_INFO: -+ return false; -+ -+ case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_LOD: -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ break; -+ } -+ if (load->sampler.var) -+ return false; -+ -+ if (!hlsl_type_is_resource(load->resource.var->data_type)) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); -+ return false; -+ } -+ -+ assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); -+ -+ if (!(name = hlsl_get_string_buffer(ctx))) -+ return false; -+ vkd3d_string_buffer_printf(name, "%s", load->resource.var->name); -+ -+ TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); -+ -+ if (!(var = hlsl_get_var(ctx->globals, name->buffer))) -+ { -+ struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); -+ -+ /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ -+ struct hlsl_type *arr_type = load->resource.var->data_type; -+ for (i = 0; i < load->resource.path_len; ++i) -+ { -+ assert(arr_type->class == HLSL_CLASS_ARRAY); -+ texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); -+ arr_type = arr_type->e.array.type; -+ } -+ -+ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false))) -+ { -+ hlsl_release_string_buffer(ctx, name); -+ return false; -+ } -+ var->is_uniform = 1; -+ var->is_separated_resource = true; -+ -+ list_add_tail(&ctx->extern_vars, &var->extern_entry); -+ } -+ hlsl_release_string_buffer(ctx, name); -+ -+ if (load->sampling_dim != var->data_type->sampler_dim) -+ { -+ hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, -+ "Cannot split combined samplers from \"%s\" if they have different usage dimensions.", -+ load->resource.var->name); -+ hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here."); -+ return false; -+ -+ } -+ -+ hlsl_copy_deref(ctx, &load->sampler, &load->resource); -+ load->resource.var = var; -+ assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); -+ assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); -+ -+ return true; -+} -+ -+static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, -+ enum hlsl_regset regset) -+{ -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->bind_count[regset] < to_add->bind_count[regset]) -+ { -+ list_add_before(&var->extern_entry, &to_add->extern_entry); -+ return; -+ } -+ } -+ -+ list_add_tail(list, &to_add->extern_entry); -+} -+ -+static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) -+{ -+ struct list separated_resources; -+ struct hlsl_ir_var *var, *next; -+ -+ list_init(&separated_resources); -+ -+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_separated_resource) -+ { -+ list_remove(&var->extern_entry); -+ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); -+ } -+ } -+ -+ list_move_head(&ctx->extern_vars, &separated_resources); -+ -+ return false; -+} -+ - /* Lower DIV to RCP + MUL. */ - static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -@@ -2232,6 +2440,54 @@ static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * - return true; - } - -+/* Use 'movc' for the ternary operator. */ -+static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], *replacement; -+ struct hlsl_ir_node *zero, *cond, *first, *second; -+ struct hlsl_constant_value zero_value = { 0 }; -+ struct hlsl_ir_expr *expr; -+ struct hlsl_type *type; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP3_TERNARY) -+ return false; -+ -+ cond = expr->operands[0].node; -+ first = expr->operands[1].node; -+ second = expr->operands[2].node; -+ -+ if (cond->data_type->base_type == HLSL_TYPE_FLOAT) -+ { -+ if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) -+ return false; -+ list_add_tail(&instr->entry, &zero->entry); -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = zero; -+ operands[1] = cond; -+ type = cond->data_type; -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); -+ if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &cond->entry); -+ } -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = cond; -+ operands[1] = first; -+ operands[2] = second; -+ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &replacement->entry); -+ -+ hlsl_replace_node(instr, replacement); -+ return true; -+} -+ - static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_type *type = instr->data_type, *arg_type; -@@ -2264,7 +2520,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return true; - } - --struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) - { - struct hlsl_block then_block, else_block; -@@ -2290,18 +2546,18 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *ins - - if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) - return NULL; -- list_add_tail(instrs, &iff->entry); -+ hlsl_block_add_instr(instrs, iff); - - if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(instrs, &load->node); - - return &load->node; - } - --static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; -+ struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_constant_value high_bit_value; - struct hlsl_ir_expr *expr; -@@ -2322,56 +2578,52 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - - if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) - return false; -- list_add_before(&instr->entry, &xor->entry); -+ hlsl_block_add_instr(block, xor); - - for (i = 0; i < type->dimx; ++i) - high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; -- list_add_before(&instr->entry, &high_bit->entry); -+ hlsl_block_add_instr(block, high_bit); - - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) - return false; -- list_add_before(&instr->entry, &and->entry); -+ hlsl_block_add_instr(block, and); - - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs1->entry); -+ hlsl_block_add_instr(block, abs1); - - if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast1->entry); -+ hlsl_block_add_instr(block, cast1); - - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs2->entry); -+ hlsl_block_add_instr(block, abs2); - - if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast2->entry); -+ hlsl_block_add_instr(block, cast2); - - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast3->entry); -+ hlsl_block_add_instr(block, cast3); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg->entry); -- -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) -- return false; -- hlsl_replace_node(instr, cond); -+ hlsl_block_add_instr(block, neg); - -- return true; -+ return hlsl_add_conditional(ctx, block, and, neg, cast3); - } - --static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; -+ struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_constant_value high_bit_value; - struct hlsl_ir_expr *expr; -@@ -2394,45 +2646,41 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - high_bit_value.u[i].u = 0x80000000; - if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) - return false; -- list_add_before(&instr->entry, &high_bit->entry); -+ hlsl_block_add_instr(block, high_bit); - - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) - return false; -- list_add_before(&instr->entry, &and->entry); -+ hlsl_block_add_instr(block, and); - - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs1->entry); -+ hlsl_block_add_instr(block, abs1); - - if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast1->entry); -+ hlsl_block_add_instr(block, cast1); - - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs2->entry); -+ hlsl_block_add_instr(block, abs2); - - if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast2->entry); -+ hlsl_block_add_instr(block, cast2); - - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast3->entry); -+ hlsl_block_add_instr(block, cast3); - - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg->entry); -+ hlsl_block_add_instr(block, neg); - -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) -- return false; -- hlsl_replace_node(instr, cond); -- -- return true; -+ return hlsl_add_conditional(ctx, block, and, neg, cast3); - } - - static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2516,9 +2764,9 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void - return false; - } - --static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; -+ struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one, *mul3; - struct hlsl_type *type = instr->data_type, *btype; - struct hlsl_constant_value one_value; - struct hlsl_ir_expr *expr; -@@ -2539,47 +2787,100 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - - if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) - return false; -- list_add_before(&instr->entry, &mul1->entry); -+ hlsl_block_add_instr(block, mul1); - - if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg1->entry); -+ hlsl_block_add_instr(block, neg1); - - if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) - return false; - ge->data_type = btype; -- list_add_before(&instr->entry, &ge->entry); -+ hlsl_block_add_instr(block, ge); - - if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg2->entry); -+ hlsl_block_add_instr(block, neg2); - -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) -+ if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) - return false; - - for (i = 0; i < type->dimx; ++i) - one_value.u[i].f = 1.0f; - if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) - return false; -- list_add_before(&instr->entry, &one->entry); -+ hlsl_block_add_instr(block, one); - - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, div, arg1))) - return false; -- list_add_before(&instr->entry, &mul2->entry); -+ hlsl_block_add_instr(block, mul2); - - if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &frc->entry); -+ hlsl_block_add_instr(block, frc); - -- expr->op = HLSL_OP2_MUL; -- hlsl_src_remove(&expr->operands[0]); -- hlsl_src_remove(&expr->operands[1]); -- hlsl_src_from_node(&expr->operands[0], frc); -- hlsl_src_from_node(&expr->operands[1], cond); -+ if (!(mul3 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, frc, cond))) -+ return false; -+ hlsl_block_add_instr(block, mul3); -+ -+ return true; -+} -+ -+static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -+ static const struct hlsl_constant_value zero_value; -+ struct hlsl_type *arg_type, *cmp_type; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_ir_jump *jump; -+ struct hlsl_block block; -+ unsigned int i, count; -+ -+ if (instr->type != HLSL_IR_JUMP) -+ return false; -+ jump = hlsl_ir_jump(instr); -+ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) -+ return false; -+ -+ hlsl_block_init(&block); -+ -+ arg_type = jump->condition.node->data_type; -+ if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, zero); -+ -+ operands[0] = jump->condition.node; -+ operands[1] = zero; -+ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); -+ if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, cmp); -+ -+ if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, bool_false); -+ -+ or = bool_false; -+ -+ count = hlsl_type_component_count(cmp_type); -+ for (i = 0; i < count; ++i) -+ { -+ if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) -+ return false; -+ -+ if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) -+ return NULL; -+ hlsl_block_add_instr(&block, or); -+ } -+ -+ list_move_tail(&instr->entry, &block.instrs); -+ hlsl_src_remove(&jump->condition); -+ hlsl_src_from_node(&jump->condition, or); -+ jump->type = HLSL_IR_JUMP_DISCARD_NZ; - - return true; - } -@@ -2698,7 +2999,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - continue; - regset = hlsl_type_get_regset(var->data_type); - -- if (var->reg_reservation.reg_type && var->regs[regset].bind_count) -+ if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) - { - if (var->reg_reservation.reg_type != get_regset_name(regset)) - { -@@ -2716,7 +3017,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - var->regs[regset].id = var->reg_reservation.reg_index; - TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, - var->reg_reservation.reg_index, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index + var->regs[regset].bind_count); -+ var->reg_reservation.reg_index + var->regs[regset].allocation_size); - } - } - } -@@ -2806,7 +3107,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - load->sampler.offset.node->last_read = last_read; - } - -- load->coords.node->last_read = last_read; -+ if (load->coords.node) -+ load->coords.node->last_read = last_read; - if (load->texel_offset.node) - load->texel_offset.node->last_read = last_read; - if (load->lod.node) -@@ -2848,8 +3150,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - index->idx.node->last_read = last_read; - break; - } -- case HLSL_IR_CONSTANT: - case HLSL_IR_JUMP: -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ if (jump->condition.node) -+ jump->condition.node->last_read = last_read; -+ break; -+ } -+ case HLSL_IR_CONSTANT: - break; - } - } -@@ -2966,7 +3275,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); - - ret.id = reg_idx; -- ret.bind_count = 1; -+ ret.allocation_size = 1; - ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); - ret.allocated = true; - return ret; -@@ -3002,7 +3311,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo - record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); - - ret.id = reg_idx; -- ret.bind_count = align(reg_size, 4) / 4; -+ ret.allocation_size = align(reg_size, 4) / 4; - ret.allocated = true; - return ret; - } -@@ -3034,7 +3343,7 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct - return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); - } - --static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_ir_resource_load *load; - struct hlsl_ir_var *var; -@@ -3046,15 +3355,16 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n - - load = hlsl_ir_resource_load(instr); - var = load->resource.var; -+ - regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -+ return false; - - if (regset == HLSL_REGSET_SAMPLERS) - { - enum hlsl_sampler_dim dim; - - assert(!load->sampler.var); -- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -- return false; - - dim = var->objects_usage[regset][index].sampler_dim; - if (dim != load->sampling_dim) -@@ -3072,25 +3382,39 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n - return false; - } - } -- var->objects_usage[regset][index].used = true; -- var->objects_usage[regset][index].sampler_dim = load->sampling_dim; - } -- else -- { -- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -- return false; -+ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; -+ -+ return false; -+} - -- var->objects_usage[regset][index].used = true; -- var->objects_usage[regset][index].sampler_dim = load->sampling_dim; -+static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_resource_load *load; -+ struct hlsl_ir_var *var; -+ enum hlsl_regset regset; -+ unsigned int index; - -- if (load->sampler.var) -- { -- var = load->sampler.var; -- if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -- return false; -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; - -- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -- } -+ load = hlsl_ir_resource_load(instr); -+ var = load->resource.var; -+ -+ regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -+ return false; -+ -+ var->objects_usage[regset][index].used = true; -+ var->bind_count[regset] = max(var->bind_count[regset], index + 1); -+ if (load->sampler.var) -+ { -+ var = load->sampler.var; -+ if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -+ return false; -+ -+ var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -+ var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); - } - - return false; -@@ -3100,7 +3424,7 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) - { - struct hlsl_ir_var *var; - struct hlsl_type *type; -- unsigned int i, k; -+ unsigned int k; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -3108,12 +3432,10 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) - - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - { -- for (i = 0; i < type->reg_size[k]; ++i) -- { -- /* Samplers are only allocated until the last used one. */ -- if (var->objects_usage[k][i].used) -- var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; -- } -+ bool is_separated = var->is_separated_resource; -+ -+ if (var->bind_count[k] > 0) -+ var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; - } - } - } -@@ -3192,10 +3514,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, - } - } - -+static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) -+{ -+ struct hlsl_constant_defs *defs = &ctx->constant_defs; -+ struct hlsl_constant_register *reg; -+ size_t i; -+ -+ for (i = 0; i < defs->count; ++i) -+ { -+ reg = &defs->regs[i]; -+ if (reg->index == (component_index / 4)) -+ { -+ reg->value.f[component_index % 4] = f; -+ return; -+ } -+ } -+ -+ if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) -+ return; -+ reg = &defs->regs[defs->count++]; -+ memset(reg, 0, sizeof(*reg)); -+ reg->index = component_index / 4; -+ reg->value.f[component_index % 4] = f; -+} -+ - static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - struct hlsl_block *block, struct register_allocator *allocator) - { -- struct hlsl_constant_defs *defs = &ctx->constant_defs; - struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -@@ -3206,66 +3551,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - const struct hlsl_type *type = instr->data_type; -- unsigned int x, y, i, writemask, end_reg; -- unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int x, i; - - constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - -- if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, -- constant->reg.id + reg_size / 4, sizeof(*defs->values))) -- return; -- end_reg = constant->reg.id + reg_size / 4; -- if (end_reg > defs->count) -- { -- memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); -- defs->count = end_reg; -- } -- - assert(type->class <= HLSL_CLASS_LAST_NUMERIC); -+ assert(type->dimy == 1); -+ assert(constant->reg.writemask); - -- if (!(writemask = constant->reg.writemask)) -- writemask = (1u << type->dimx) - 1; -- -- for (y = 0; y < type->dimy; ++y) -+ for (x = 0, i = 0; x < 4; ++x) - { -- for (x = 0, i = 0; x < 4; ++x) -+ const union hlsl_constant_value_component *value; -+ float f; -+ -+ if (!(constant->reg.writemask & (1u << x))) -+ continue; -+ value = &constant->value.u[i++]; -+ -+ switch (type->base_type) - { -- const union hlsl_constant_value_component *value; -- float f; -- -- if (!(writemask & (1u << x))) -- continue; -- value = &constant->value.u[i++]; -- -- switch (type->base_type) -- { -- case HLSL_TYPE_BOOL: -- f = !!value->u; -- break; -- -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- f = value->f; -- break; -- -- case HLSL_TYPE_INT: -- f = value->i; -- break; -- -- case HLSL_TYPE_UINT: -- f = value->u; -- break; -- -- case HLSL_TYPE_DOUBLE: -- FIXME("Double constant.\n"); -- return; -- -- default: -- vkd3d_unreachable(); -- } -- defs->values[constant->reg.id + y].f[x] = f; -+ case HLSL_TYPE_BOOL: -+ f = !!value->u; -+ break; -+ -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ f = value->f; -+ break; -+ -+ case HLSL_TYPE_INT: -+ f = value->i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ f = value->u; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ FIXME("Double constant.\n"); -+ return; -+ -+ default: -+ vkd3d_unreachable(); - } -+ -+ record_constant(ctx, constant->reg.id * 4 + x, f); - } - - break; -@@ -3297,8 +3628,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -- allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); -- - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->last_read) -@@ -3315,6 +3644,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - } - } - -+ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); -+ - vkd3d_free(allocator.allocations); - } - -@@ -3410,7 +3741,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - { - var->regs[HLSL_REGSET_NUMERIC].allocated = true; - var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; -- var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; -+ var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; - var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; - TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', - var->regs[HLSL_REGSET_NUMERIC], var->data_type)); -@@ -3497,7 +3828,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) -+ if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) - continue; - - buffer = var1->buffer; -@@ -3508,7 +3839,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) - { - unsigned int var1_reg_size, var2_reg_size; - -- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) -+ if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) - continue; - - if (var1 == var2 || var1->buffer != var2->buffer) -@@ -3558,7 +3889,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) - { - if (var->is_param) - var->buffer = ctx->params_buffer; -@@ -3589,7 +3920,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - buffer->reg.id = buffer->reservation.reg_index; -- buffer->reg.bind_count = 1; -+ buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; - TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); - } -@@ -3599,7 +3930,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - ++index; - - buffer->reg.id = index; -- buffer->reg.bind_count = 1; -+ buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; - TRACE("Allocated %s to cb%u.\n", buffer->name, index); - ++index; -@@ -3618,7 +3949,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, -- uint32_t index) -+ uint32_t index, bool allocated_only) - { - const struct hlsl_ir_var *var; - unsigned int start, count; -@@ -3632,11 +3963,14 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - * bound there even if the reserved vars aren't used. */ - start = var->reg_reservation.reg_index; - count = var->data_type->reg_size[regset]; -+ -+ if (!var->regs[regset].allocated && allocated_only) -+ continue; - } - else if (var->regs[regset].allocated) - { - start = var->regs[regset].id; -- count = var->regs[regset].bind_count; -+ count = var->regs[regset].allocation_size; - } - else - { -@@ -3667,11 +4001,12 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- unsigned int count = var->regs[regset].bind_count; -+ unsigned int count = var->regs[regset].allocation_size; - - if (count == 0) - continue; - -+ /* The variable was already allocated if it has a reservation. */ - if (var->regs[regset].allocated) - { - const struct hlsl_ir_var *reserved_object, *last_reported = NULL; -@@ -3690,7 +4025,10 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - { - index = var->regs[regset].id + i; - -- reserved_object = get_allocated_object(ctx, regset, index); -+ /* get_allocated_object() may return "var" itself, but we -+ * actually want that, otherwise we'll end up reporting the -+ * same conflict between the same two variables twice. */ -+ reserved_object = get_allocated_object(ctx, regset, index, true); - if (reserved_object && reserved_object != var && reserved_object != last_reported) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -@@ -3709,7 +4047,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - while (available < count) - { -- if (get_allocated_object(ctx, regset, index)) -+ if (get_allocated_object(ctx, regset, index, false)) - available = 0; - else - ++available; -@@ -3853,6 +4191,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) - { - struct hlsl_ir_node *offset_node = deref->offset.node; -+ enum hlsl_regset regset; - unsigned int size; - - if (!offset_node) -@@ -3869,8 +4208,9 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - return false; - - *offset = hlsl_ir_constant(offset_node)->value.u[0].u; -+ regset = hlsl_type_get_regset(deref->data_type); - -- size = deref->var->data_type->reg_size[deref->offset_regset]; -+ size = deref->var->data_type->reg_size[regset]; - if (*offset >= size) - { - hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -@@ -3900,7 +4240,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -- assert(deref->offset_regset == HLSL_REGSET_NUMERIC); -+ assert(deref->data_type); -+ assert(deref->data_type->class <= HLSL_CLASS_LAST_NUMERIC); - - ret.id += offset / 4; - -@@ -4003,12 +4344,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); - -+ lower_ir(ctx, lower_matrix_swizzles, body); - hlsl_transform_ir(ctx, lower_index_loads, body, NULL); - - LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) - { - if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -- prepend_uniform_copy(ctx, &body->instrs, var); -+ prepend_uniform_copy(ctx, body, var); - } - - for (i = 0; i < entry_func->parameters.count; ++i) -@@ -4017,7 +4359,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) - { -- prepend_uniform_copy(ctx, &body->instrs, var); -+ prepend_uniform_copy(ctx, body, var); - } - else - { -@@ -4033,9 +4375,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - } - - if (var->storage_modifiers & HLSL_STORAGE_IN) -- prepend_input_var_copy(ctx, &body->instrs, var); -+ prepend_input_var_copy(ctx, body, var); - if (var->storage_modifiers & HLSL_STORAGE_OUT) -- append_output_var_copy(ctx, &body->instrs, var); -+ append_output_var_copy(ctx, body, var); - } - } - if (entry_func->return_var) -@@ -4044,7 +4386,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - -- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); -+ append_output_var_copy(ctx, body, entry_func->return_var); - } - - for (i = 0; i < entry_func->attr_count; ++i) -@@ -4062,6 +4404,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - -+ if (profile->major_version >= 4) -+ { -+ hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); -+ } - hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); - while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); - do -@@ -4075,10 +4421,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); - hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); - hlsl_transform_ir(ctx, lower_int_dot, body, NULL); -- hlsl_transform_ir(ctx, lower_int_division, body, NULL); -- hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); -+ lower_ir(ctx, lower_int_division, body); -+ lower_ir(ctx, lower_int_modulus, body); - hlsl_transform_ir(ctx, lower_int_abs, body, NULL); -- hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); -+ lower_ir(ctx, lower_float_modulus, body); - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { -@@ -4094,6 +4440,15 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); - hlsl_transform_ir(ctx, lower_int_dot, body, NULL); - -+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); -+ if (profile->major_version >= 4) -+ hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -+ hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -+ sort_synthetic_separated_samplers_first(ctx); -+ -+ if (profile->major_version >= 4) -+ hlsl_transform_ir(ctx, lower_ternary, body, NULL); - if (profile->major_version < 4) - { - hlsl_transform_ir(ctx, lower_division, body, NULL); -@@ -4107,9 +4462,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_abs, body, NULL); - } - -- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -- hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -- - /* TODO: move forward, remove when no longer needed */ - transform_derefs(ctx, replace_deref_path_with_offset, body); - while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 301113c8477..41a72ab6c0d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -80,7 +80,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - return false; - } - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (src->node.data_type->base_type) - { -@@ -152,6 +152,51 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - return true; - } - -+static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, -+ "Indefinite logarithm result."); -+ } -+ dst->u[k].f = log2f(src->value.u[k].f); -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (src->value.u[k].d < 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, -+ "Indefinite logarithm result."); -+ } -+ dst->u[k].d = log2(src->value.u[k].d); -+ break; -+ -+ default: -+ FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { -@@ -160,7 +205,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - - assert(type == src->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -@@ -186,6 +231,96 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - return true; - } - -+static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ "Floating point division by zero."); -+ } -+ dst->u[k].f = 1.0f / src->value.u[k].f; -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (src->value.u[k].d == 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ "Floating point division by zero."); -+ } -+ dst->u[k].d = 1.0 / src->value.u[k].d; -+ break; -+ -+ default: -+ FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, -+ "Imaginary square root result."); -+ } -+ dst->u[k].f = sqrtf(src->value.u[k].f); -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (src->value.u[k].d < 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, -+ "Imaginary square root result."); -+ } -+ dst->u[k].d = sqrt(src->value.u[k].d); -+ break; -+ -+ default: -+ FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -@@ -195,7 +330,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -@@ -223,7 +358,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { - enum hlsl_base_type type = dst_type->base_type; -@@ -232,65 +367,132 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; - break; - -- case HLSL_TYPE_DOUBLE: -- dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; -+ default: -+ FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; - break; - -+ default: -+ FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; -+ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; - break; - - default: -- FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); - -- for (k = 0; k < 4; ++k) -+ dst->u[0].f = 0.0f; -+ for (k = 0; k < src1->node.data_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; -+ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; - break; -+ default: -+ FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } - -- case HLSL_TYPE_DOUBLE: -- dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; -- break; -+ return true; -+} - -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; -- break; -+static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; - -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ assert(type == src3->node.data_type->base_type); -+ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ assert(src3->node.data_type->dimx == 1); -+ -+ dst->u[0].f = src3->value.u[0].f; -+ for (k = 0; k < src1->node.data_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; -+ break; - default: -- vkd3d_unreachable(); -+ FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; - } -- -- dst->u[k].u *= ~0u; - } -+ - return true; - } - -@@ -363,45 +565,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -- const struct vkd3d_shader_location *loc) -+static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: -- if (src2->value.u[k].i == 0) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -- return false; -- } -- if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -- dst->u[k].i = 0; -- else -- dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ -+static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; - break; - - case HLSL_TYPE_UINT: -- if (src2->value.u[k].u == 0) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -- return false; -- } -- dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; - break; - - default: -- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); -- return false; -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ -+static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } -@@ -419,6 +692,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: - dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); - break; -@@ -448,6 +730,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: - dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); - break; -@@ -464,8 +755,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - return true; - } - --static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -- const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -+ const struct vkd3d_shader_location *loc) - { - enum hlsl_base_type type = dst_type->base_type; - unsigned int k; -@@ -478,19 +770,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - switch (type) - { - case HLSL_TYPE_INT: -+ if (src2->value.u[k].i == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -+ dst->u[k].i = 0; -+ else -+ dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; -+ break; -+ - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; -+ if (src2->value.u[k].u == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { - enum hlsl_base_type type = dst_type->base_type; -@@ -503,48 +811,67 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; -+ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - - for (k = 0; k < dst_type->dimx; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); -- return false; -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } - - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_constant *arg1, *arg2 = NULL; -+ struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; - struct hlsl_constant_value res = {0}; - struct hlsl_ir_node *res_node; - struct hlsl_ir_expr *expr; -@@ -572,6 +899,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - arg1 = hlsl_ir_constant(expr->operands[0].node); - if (expr->operands[1].node) - arg2 = hlsl_ir_constant(expr->operands[1].node); -+ if (expr->operands[2].node) -+ arg3 = hlsl_ir_constant(expr->operands[2].node); - - switch (expr->op) - { -@@ -583,28 +912,58 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_cast(ctx, &res, instr->data_type, arg1); - break; - -+ case HLSL_OP1_LOG2: -+ success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); -+ break; -+ - case HLSL_OP1_NEG: - success = fold_neg(ctx, &res, instr->data_type, arg1); - break; - -+ case HLSL_OP1_RCP: -+ success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); -+ break; -+ -+ case HLSL_OP1_SQRT: -+ success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); -+ break; -+ - case HLSL_OP2_ADD: - success = fold_add(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_MUL: -- success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_LOGIC_AND: -+ success = fold_and(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_NEQUAL: -- success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_LOGIC_OR: -+ success = fold_or(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_BIT_XOR: -+ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_DOT: -+ success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_DIV: - success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_MOD: -- success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); -+ case HLSL_OP2_EQUAL: -+ success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_GEQUAL: -+ success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_LESS: -+ success = fold_less(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_MAX: -@@ -615,16 +974,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - success = fold_min(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_XOR: -- success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_MOD: -+ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_BIT_AND: -- success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_MUL: -+ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_OR: -- success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); -+ case HLSL_OP2_NEQUAL: -+ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP3_DP2ADD: -+ success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); - break; - - default: -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 9eefb82c226..6d7c89653e3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -85,6 +85,72 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i - shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); - } - -+static const struct vkd3d_shader_varying_map *find_varying_map( -+ const struct vkd3d_shader_next_stage_info *next_stage, unsigned int signature_idx) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < next_stage->varying_count; ++i) -+ { -+ if (next_stage->varying_map[i].output_signature_index == signature_idx) -+ return &next_stage->varying_map[i]; -+ } -+ -+ return NULL; -+} -+ -+static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info) -+{ -+ struct shader_signature *signature = &parser->shader_desc.output_signature; -+ const struct vkd3d_shader_next_stage_info *next_stage; -+ unsigned int i; -+ -+ if (!(next_stage = vkd3d_find_struct(compile_info->next, NEXT_STAGE_INFO))) -+ return VKD3D_OK; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ const struct vkd3d_shader_varying_map *map = find_varying_map(next_stage, i); -+ struct signature_element *e = &signature->elements[i]; -+ -+ if (map) -+ { -+ unsigned int input_mask = map->input_mask; -+ -+ e->target_location = map->input_register_index; -+ -+ /* It is illegal in Vulkan if the next shader uses the same varying -+ * location with a different mask. */ -+ if (input_mask && input_mask != e->mask) -+ { -+ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: " -+ "Output mask %#x does not match input mask %#x.", -+ e->mask, input_mask); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ } -+ else -+ { -+ e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; -+ } -+ } -+ -+ for (i = 0; i < next_stage->varying_count; ++i) -+ { -+ if (next_stage->varying_map[i].output_signature_index >= signature->element_count) -+ { -+ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: " -+ "The next stage consumes varyings not written by this stage."); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ - struct hull_flattener - { - struct vkd3d_shader_instruction_array instructions; -@@ -230,7 +296,7 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali - return VKD3D_OK; - } - --static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, -+void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) - { - reg->type = reg_type; -@@ -247,13 +313,13 @@ static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_s - reg->immconst_type = VKD3D_IMMCONST_SCALAR; - } - --static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) -+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) - { - memset(ins, 0, sizeof(*ins)); - ins->handler_idx = handler_idx; - } - --enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) -+static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) - { - struct hull_flattener flattener = {*src_instructions}; - struct vkd3d_shader_instruction_array *instructions; -@@ -388,7 +454,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p - return VKD3D_OK; - } - --enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( -+static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( - struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) - { - struct vkd3d_shader_instruction_array *instructions; -@@ -999,7 +1065,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - shader_instruction_init(ins, VKD3DSIH_NOP); - } - --enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, -+static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, - enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, - struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) - { -@@ -1070,3 +1136,159 @@ enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_i - *instructions = normaliser.instructions; - return VKD3D_OK; - } -+ -+struct flat_constant_def -+{ -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t index; -+ uint32_t value[4]; -+}; -+ -+struct flat_constants_normaliser -+{ -+ struct vkd3d_shader_parser *parser; -+ struct flat_constant_def *defs; -+ size_t def_count, defs_capacity; -+}; -+ -+static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, -+ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) -+{ -+ static const struct -+ { -+ enum vkd3d_shader_register_type type; -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t offset; -+ } -+ regs[] = -+ { -+ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, -+ {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, -+ {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, -+ {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, -+ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, -+ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, -+ }; -+ -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(regs); ++i) -+ { -+ if (reg->type == regs[i].type) -+ { -+ if (reg->idx[0].rel_addr) -+ { -+ FIXME("Unhandled relative address.\n"); -+ return false; -+ } -+ -+ *set = regs[i].set; -+ *index = regs[i].offset + reg->idx[0].offset; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param, -+ const struct flat_constants_normaliser *normaliser) -+{ -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t index; -+ size_t i, j; -+ -+ if (!get_flat_constant_register_type(¶m->reg, &set, &index)) -+ return; -+ -+ for (i = 0; i < normaliser->def_count; ++i) -+ { -+ if (normaliser->defs[i].set == set && normaliser->defs[i].index == index) -+ { -+ param->reg.type = VKD3DSPR_IMMCONST; -+ param->reg.idx_count = 0; -+ param->reg.immconst_type = VKD3D_IMMCONST_VEC4; -+ for (j = 0; j < 4; ++j) -+ param->reg.u.immconst_uint[j] = normaliser->defs[i].value[j]; -+ return; -+ } -+ } -+ -+ param->reg.type = VKD3DSPR_CONSTBUFFER; -+ param->reg.idx[0].offset = set; /* register ID */ -+ param->reg.idx[1].offset = set; /* register index */ -+ param->reg.idx[2].offset = index; /* buffer index */ -+ param->reg.idx_count = 3; -+} -+ -+static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d_shader_parser *parser) -+{ -+ struct flat_constants_normaliser normaliser = {.parser = parser}; -+ unsigned int i, j; -+ -+ for (i = 0; i < parser->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; -+ -+ if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) -+ { -+ struct flat_constant_def *def; -+ -+ if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity, -+ normaliser.def_count + 1, sizeof(*normaliser.defs))) -+ { -+ vkd3d_free(normaliser.defs); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ def = &normaliser.defs[normaliser.def_count++]; -+ -+ get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); -+ for (j = 0; j < 4; ++j) -+ def->value[j] = ins->src[0].reg.u.immconst_uint[j]; -+ -+ vkd3d_shader_instruction_make_nop(ins); -+ } -+ else -+ { -+ for (j = 0; j < ins->src_count; ++j) -+ shader_register_normalise_flat_constants((struct vkd3d_shader_src_param *)&ins->src[j], &normaliser); -+ } -+ } -+ -+ vkd3d_free(normaliser.defs); -+ return VKD3D_OK; -+} -+ -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &parser->instructions; -+ enum vkd3d_result result = VKD3D_OK; -+ -+ if (parser->shader_desc.is_dxil) -+ return result; -+ -+ if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL -+ && (result = remap_output_signature(parser, compile_info)) < 0) -+ return result; -+ -+ if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL -+ && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) -+ { -+ result = instruction_array_normalise_hull_shader_control_point_io(instructions, -+ &parser->shader_desc.input_signature); -+ } -+ if (result >= 0) -+ result = instruction_array_normalise_io_registers(instructions, parser->shader_version.type, -+ &parser->shader_desc.input_signature, &parser->shader_desc.output_signature, -+ &parser->shader_desc.patch_constant_signature); -+ -+ if (result >= 0) -+ result = instruction_array_normalise_flat_constants(parser); -+ -+ if (result >= 0 && TRACE_ON()) -+ vkd3d_shader_trace(instructions, &parser->shader_version); -+ -+ return result; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index 94079696280..6fb61eff6c3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -30,6 +30,13 @@ - - #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) - -+static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) -+{ -+ if (!ctx->expansion_count) -+ return NULL; -+ return ctx->expansion_stack[ctx->expansion_count - 1].macro; -+} -+ - static void update_location(struct preproc_ctx *ctx); - - #define YY_USER_ACTION update_location(yyget_extra(yyscanner)); -@@ -125,7 +132,20 @@ INT_SUFFIX [uUlL]{0,2} - const char *p; - - if (!ctx->last_was_newline) -- return T_HASHSTRING; -+ { -+ struct preproc_macro *macro; -+ -+ /* Stringification is only done for function-like macro bodies. -+ * Anywhere else, we need to parse it as two separate tokens. -+ * We could use a state for this, but yyless() is easier and cheap. -+ */ -+ -+ if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ return T_HASHSTRING; -+ -+ yyless(1); -+ return T_TEXT; -+ } - - for (p = yytext + 1; strchr(" \t", *p); ++p) - ; -@@ -219,13 +239,6 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) - return file->if_stack[file->if_count - 1].current_true; - } - --static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) --{ -- if (!ctx->expansion_count) -- return NULL; -- return ctx->expansion_stack[ctx->expansion_count - 1].macro; --} -- - /* Concatenation is not done for object-like macros, but is done for both - * function-like macro bodies and their arguments. */ - static bool should_concat(struct preproc_ctx *ctx) -@@ -334,6 +347,43 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, - return true; - } - -+static void preproc_stringify(struct preproc_ctx *ctx, struct vkd3d_string_buffer *buffer, const char *text) -+{ -+ const struct preproc_text *expansion; -+ const char *p = text + 1; -+ unsigned int i; -+ -+ while (*p == ' ' || *p == '\t') -+ ++p; -+ -+ vkd3d_string_buffer_printf(buffer, "\""); -+ if ((expansion = find_arg_expansion(ctx, p))) -+ { -+ size_t len = expansion->text.content_size; -+ size_t start = 0; -+ -+ while (len && strchr(" \t\r\n", expansion->text.buffer[len - 1])) -+ --len; -+ -+ while (start < len && strchr(" \t\r\n", expansion->text.buffer[start])) -+ ++start; -+ -+ for (i = start; i < len; ++i) -+ { -+ char c = expansion->text.buffer[i]; -+ -+ if (c == '\\' || c == '"') -+ vkd3d_string_buffer_printf(buffer, "\\"); -+ vkd3d_string_buffer_printf(buffer, "%c", c); -+ } -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, "%s", p); -+ } -+ vkd3d_string_buffer_printf(buffer, "\""); -+} -+ - int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - struct preproc_ctx *ctx = yyget_extra(scanner); -@@ -441,9 +491,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - switch (func_state->state) - { - case STATE_NONE: -- { -- struct preproc_macro *macro; -- - if (token == T_CONCAT && should_concat(ctx)) - { - while (ctx->buffer.content_size -@@ -452,37 +499,17 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - break; - } - -- /* Stringification, however, is only done for function-like -- * macro bodies. */ -- if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ if (token == T_HASHSTRING) - { -- const struct preproc_text *expansion; -- const char *p = text + 1; -- unsigned int i; -+ struct vkd3d_string_buffer buffer; - - if (ctx->current_directive) - return return_token(token, lval, text); - -- while (*p == ' ' || *p == '\t') -- ++p; -- -- vkd3d_string_buffer_printf(&ctx->buffer, "\""); -- if ((expansion = find_arg_expansion(ctx, p))) -- { -- for (i = 0; i < expansion->text.content_size; ++i) -- { -- char c = expansion->text.buffer[i]; -- -- if (c == '\\' || c == '"') -- vkd3d_string_buffer_printf(&ctx->buffer, "\\"); -- vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); -- } -- } -- else -- { -- vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); -- } -- vkd3d_string_buffer_printf(&ctx->buffer, "\""); -+ vkd3d_string_buffer_init(&buffer); -+ preproc_stringify(ctx, &buffer, text); -+ vkd3d_string_buffer_printf(&ctx->buffer, "%s", buffer.buffer); -+ vkd3d_string_buffer_cleanup(&buffer); - break; - } - -@@ -586,7 +613,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - else - vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); - break; -- } - - case STATE_IDENTIFIER: - if (token == '(') -@@ -628,6 +654,41 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - - switch (token) - { -+ /* Most text gets left alone (e.g. if it contains macros, -+ * the macros should be evaluated later). -+ * Arguments are a special case, and are replaced with -+ * their values immediately. */ -+ case T_IDENTIFIER: -+ case T_IDENTIFIER_PAREN: -+ { -+ const struct preproc_text *expansion; -+ -+ if ((expansion = find_arg_expansion(ctx, text))) -+ { -+ preproc_push_expansion(ctx, expansion, NULL); -+ continue; -+ } -+ -+ if (current_arg) -+ preproc_text_add(current_arg, text); -+ break; -+ } -+ -+ /* Stringification is another special case. Unsurprisingly, -+ * we need to stringify if this is an argument. More -+ * surprisingly, we need to stringify even if it's not. */ -+ case T_HASHSTRING: -+ { -+ struct vkd3d_string_buffer buffer; -+ -+ vkd3d_string_buffer_init(&buffer); -+ preproc_stringify(ctx, &buffer, text); -+ if (current_arg) -+ preproc_text_add(current_arg, buffer.buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+ break; -+ } -+ - case T_NEWLINE: - if (current_arg) - preproc_text_add(current_arg, " "); -@@ -686,6 +747,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - if (current_arg) - preproc_text_add(current_arg, text); - } -+ -+ if (current_arg) -+ preproc_text_add(current_arg, " "); - break; - } - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 3542b5fac51..95f6914acb7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -199,6 +199,16 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - } - } - -+static inline bool register_is_undef(const struct vkd3d_shader_register *reg) -+{ -+ return reg->type == VKD3DSPR_UNDEF; -+} -+ -+static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) -+{ -+ return register_is_constant(reg) || register_is_undef(reg); -+} -+ - #define VKD3D_SPIRV_VERSION 0x00010000 - #define VKD3D_SPIRV_GENERATOR_ID 18 - #define VKD3D_SPIRV_GENERATOR_VERSION 8 -@@ -1746,6 +1756,38 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - } - } - -+static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, -+ enum vkd3d_data_type data_type, unsigned int component_count) -+{ -+ uint32_t scalar_id; -+ -+ if (component_count == 1) -+ { -+ switch (data_type) -+ { -+ case VKD3D_DATA_FLOAT: -+ case VKD3D_DATA_SNORM: -+ case VKD3D_DATA_UNORM: -+ return vkd3d_spirv_get_op_type_float(builder, 32); -+ break; -+ case VKD3D_DATA_INT: -+ case VKD3D_DATA_UINT: -+ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); -+ break; -+ case VKD3D_DATA_DOUBLE: -+ return vkd3d_spirv_get_op_type_float(builder, 64); -+ default: -+ FIXME("Unhandled data type %#x.\n", data_type); -+ return 0; -+ } -+ } -+ else -+ { -+ scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); -+ return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); -+ } -+} -+ - static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) - { - vkd3d_spirv_stream_init(&builder->debug_stream); -@@ -2263,7 +2305,7 @@ struct spirv_compiler - - uint32_t binding_idx; - -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; - unsigned int input_control_point_count; - unsigned int output_control_point_count; - bool use_vocp; -@@ -2333,7 +2375,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - - static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) - { - const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; -@@ -2429,13 +2471,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve - - compiler->shader_type = shader_version->type; - -- compiler->input_signature = shader_desc->input_signature; -- compiler->output_signature = shader_desc->output_signature; -- compiler->patch_constant_signature = shader_desc->patch_constant_signature; -- memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -- memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -- memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); -- - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { - compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); -@@ -2536,13 +2571,13 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * - } - - static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( -- const struct spirv_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) -+ const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) - { -- unsigned int register_space = cb->range.space; -- unsigned int reg_idx = cb->range.first; -+ unsigned int register_space = range->space; -+ unsigned int reg_idx = range->first; - unsigned int i; - -- if (cb->range.first != cb->range.last) -+ if (range->first != range->last) - return NULL; - - for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) -@@ -2559,8 +2594,8 @@ static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_const - return NULL; - } - --static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *compiler, -- const struct vkd3d_shader_resource *resource, const struct vkd3d_shader_sampler *sampler) -+static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range) - { - const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; - const struct vkd3d_shader_combined_resource_sampler *combined_sampler; -@@ -2569,10 +2604,35 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com - if (!shader_interface->combined_sampler_count) - return false; - -- if (resource && (resource->reg.reg.type == VKD3DSPR_UAV || resource->range.last != resource->range.first)) -+ if (range->last != range->first) - return false; - -- if (sampler && sampler->range.first != sampler->range.last) -+ for (i = 0; i < shader_interface->combined_sampler_count; ++i) -+ { -+ combined_sampler = &shader_interface->combined_samplers[i]; -+ -+ if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) -+ continue; -+ -+ if ((combined_sampler->resource_space == range->space -+ && combined_sampler->resource_index == range->first)) -+ return true; -+ } -+ -+ return false; -+} -+ -+static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range) -+{ -+ const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; -+ const struct vkd3d_shader_combined_resource_sampler *combined_sampler; -+ unsigned int i; -+ -+ if (!shader_interface->combined_sampler_count) -+ return false; -+ -+ if (range->last != range->first) - return false; - - for (i = 0; i < shader_interface->combined_sampler_count; ++i) -@@ -2582,10 +2642,8 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com - if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) - continue; - -- if ((!resource || (combined_sampler->resource_space == resource->range.space -- && combined_sampler->resource_index == resource->range.first)) -- && (!sampler || (combined_sampler->sampler_space == sampler->range.space -- && combined_sampler->sampler_index == sampler->range.first))) -+ if (combined_sampler->sampler_space == range->space -+ && combined_sampler->sampler_index == range->first) - return true; - } - -@@ -2603,6 +2661,16 @@ static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_error(struct spirv_compiler * - compiler->failed = true; - } - -+static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_warning(struct spirv_compiler *compiler, -+ enum vkd3d_shader_error error, const char *format, ...) -+{ -+ va_list args; -+ -+ va_start(args, format); -+ vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, format, args); -+ va_end(args); -+} -+ - static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range) - { -@@ -3211,13 +3279,13 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - struct vkd3d_symbol reg_symbol, *symbol; - struct rb_entry *entry; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - - if (reg->type == VKD3DSPR_TEMP) - { - assert(reg->idx[0].offset < compiler->temp_count); - register_info->id = compiler->temp_id + reg->idx[0].offset; -- register_info->storage_class = SpvStorageClassFunction; -+ register_info->storage_class = SpvStorageClassPrivate; - register_info->descriptor_array = NULL; - register_info->member_idx = 0; - register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -@@ -3553,6 +3621,19 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi - vkd3d_component_type_from_data_type(reg->data_type), component_count, values); - } - -+static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register *reg, DWORD write_mask) -+{ -+ unsigned int component_count = vkd3d_write_mask_component_count(write_mask); -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ uint32_t type_id; -+ -+ assert(reg->type == VKD3DSPR_UNDEF); -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); -+ return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); -+} -+ - static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, - const struct vkd3d_shader_register_info *reg_info) -@@ -3563,7 +3644,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type; - unsigned int skipped_component_mask; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - assert(vkd3d_write_mask_component_count(write_mask) == 1); - - component_idx = vkd3d_write_mask_get_component_idx(write_mask); -@@ -3615,6 +3696,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); - else if (reg->type == VKD3DSPR_IMMCONST64) - return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); -+ else if (reg->type == VKD3DSPR_UNDEF) -+ return spirv_compiler_emit_load_undef(compiler, reg, write_mask); - - component_count = vkd3d_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -3827,7 +3910,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, - unsigned int src_write_mask = write_mask; - uint32_t type_id; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - - if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) - return; -@@ -3998,6 +4081,11 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler - vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); - break; -+ case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: -+ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); -+ vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); -+ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); -+ break; - default: - FIXME("Unhandled interpolation mode %#x.\n", mode); - break; -@@ -4542,7 +4630,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - } - else - { -- unsigned int location = signature_element->register_index; -+ unsigned int location = signature_element->target_location; - - input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, input_component_count, array_sizes, 2); -@@ -4704,13 +4792,16 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) - - static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) - { -+ unsigned int write_mask; -+ - if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) - { - FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); - return; - } - -- *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); -+ write_mask = e->mask >> vkd3d_write_mask_get_component_idx(e->mask); -+ *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); - } - - /* Emits arrayed SPIR-V built-in variables. */ -@@ -4874,7 +4965,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - component_type = builtin->component_type; - if (!builtin->spirv_array_size) - output_component_count = builtin->component_count; -- component_idx = 0; - } - else - { -@@ -4888,14 +4978,9 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - || needs_private_io_variable(builtin)) - { - use_private_variable = true; -- reg_write_mask = write_mask; -- } -- else -- { -- component_idx = vkd3d_write_mask_get_component_idx(write_mask); -- reg_write_mask = write_mask >> component_idx; - } - -+ reg_write_mask = write_mask >> component_idx; - vkd3d_symbol_make_register(®_symbol, reg); - - if (rb_get(&compiler->symbol_table, ®_symbol)) -@@ -4918,9 +5003,15 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - - spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); - } -+ else if (signature_element->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ { -+ storage_class = SpvStorageClassPrivate; -+ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -+ storage_class, component_type, output_component_count, array_sizes, 2); -+ } - else - { -- unsigned int location = signature_element->register_index; -+ unsigned int location = signature_element->target_location; - - if (is_patch_constant) - location += shader_signature_next_location(&compiler->output_signature); -@@ -4929,10 +5020,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - storage_class, component_type, output_component_count, array_sizes, 2); - vkd3d_spirv_add_iface_variable(builder, id); - -- if (is_dual_source_blending(compiler) && signature_element->register_index < 2) -+ if (is_dual_source_blending(compiler) && location < 2) - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); -- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); -+ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, location); - } - else - { -@@ -5258,8 +5349,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler - WARN("Unhandled global flags %#x.\n", flags); - } - --static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - size_t function_location; -@@ -5270,11 +5360,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); - - assert(!compiler->temp_count); -- compiler->temp_count = instruction->declaration.count; -+ compiler->temp_count = count; - for (i = 0; i < compiler->temp_count; ++i) - { -- id = spirv_compiler_emit_variable(compiler, &builder->function_stream, -- SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); -+ id = spirv_compiler_emit_variable(compiler, &builder->global_stream, -+ SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - if (!i) - compiler->temp_id = id; - assert(id == compiler->temp_id + i); -@@ -5473,50 +5563,55 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - return var_id; - } - --static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes) - { -- const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; - const SpvStorageClass storage_class = SpvStorageClassUniform; -- const struct vkd3d_shader_register *reg = &cb->src.reg; - struct vkd3d_push_constant_buffer_binding *push_cb; - struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_symbol reg_symbol; -+ unsigned int size; -+ -+ struct vkd3d_shader_register reg = -+ { -+ .type = VKD3DSPR_CONSTBUFFER, -+ .idx[0].offset = register_id, -+ .idx_count = 1, -+ }; - -- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); -+ size = size_in_bytes / (VKD3D_VEC4_SIZE * sizeof(uint32_t)); - -- if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, cb))) -+ if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) - { - /* Push constant buffers are handled in - * spirv_compiler_emit_push_constant_buffers(). - */ -- unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); -- push_cb->reg = *reg; -- push_cb->size = cb->size; -- if (cb_size_in_bytes > push_cb->pc.size) -+ push_cb->reg = reg; -+ push_cb->size = size; -+ if (size_in_bytes > push_cb->pc.size) - { - WARN("Constant buffer size %u exceeds push constant size %u.\n", -- cb_size_in_bytes, push_cb->pc.size); -+ size_in_bytes, push_cb->pc.size); - } - return; - } - - vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); -- length_id = spirv_compiler_get_constant_uint(compiler, cb->size); -+ length_id = spirv_compiler_get_constant_uint(compiler, size); - array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); - vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16); - - struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); - vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); - vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); -- vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); -+ vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); - - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, -- reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); -+ ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); - -- vkd3d_symbol_make_register(®_symbol, reg); -+ vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); - reg_symbol.descriptor_array = var_info.array_symbol; -@@ -5557,29 +5652,34 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi - spirv_compiler_put_symbol(compiler, ®_symbol); - } - --static void spirv_compiler_emit_dcl_sampler(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range, unsigned int register_id) - { -- const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; - const SpvStorageClass storage_class = SpvStorageClassUniformConstant; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_register *reg = &sampler->src.reg; - struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_symbol reg_symbol; - uint32_t type_id, var_id; - -- vkd3d_symbol_make_sampler(®_symbol, reg); -- reg_symbol.info.sampler.range = sampler->range; -+ const struct vkd3d_shader_register reg = -+ { -+ .type = VKD3DSPR_SAMPLER, -+ .idx[0].offset = register_id, -+ .idx_count = 1, -+ }; -+ -+ vkd3d_symbol_make_sampler(®_symbol, ®); -+ reg_symbol.info.sampler.range = *range; - spirv_compiler_put_symbol(compiler, ®_symbol); - -- if (spirv_compiler_has_combined_sampler(compiler, NULL, sampler)) -+ if (spirv_compiler_has_combined_sampler_for_sampler(compiler, range)) - return; - - type_id = vkd3d_spirv_get_op_type_sampler(builder); -- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, -- &sampler->range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); -+ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -+ range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); - -- vkd3d_symbol_make_register(®_symbol, reg); -+ vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); - reg_symbol.descriptor_array = var_info.array_symbol; -@@ -5624,13 +5724,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty - } - } - --static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( -+static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( - struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, - const struct vkd3d_shader_register_range *range) - { -- const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; - unsigned int register_last = (range->last == ~0u) ? range->first : range->last; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - unsigned int i; - - for (i = 0; i < descriptor_info->descriptor_count; ++i) -@@ -5650,7 +5750,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler - bool raw_structured, uint32_t depth) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - bool uav_read, uav_atomics; - uint32_t sampled_type_id; - SpvImageFormat format; -@@ -5685,7 +5785,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - const struct vkd3d_shader_combined_resource_sampler *current; - uint32_t image_type_id, type_id, ptr_type_id, var_id; - enum vkd3d_shader_binding_flag resource_type_flag; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - struct vkd3d_symbol symbol; - unsigned int i; - bool depth; -@@ -5761,20 +5861,30 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - } - - static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, -- const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, -- enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw) -+ const struct vkd3d_shader_register_range *range, unsigned int register_id, -+ unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, -+ enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) - { - struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - SpvStorageClass storage_class = SpvStorageClassUniformConstant; - uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; -- const struct vkd3d_shader_register *reg = &resource->reg.reg; - const struct vkd3d_spirv_resource_type *resource_type_info; - enum vkd3d_shader_component_type sampled_type; - struct vkd3d_symbol resource_symbol; -- bool is_uav; - -- is_uav = reg->type == VKD3DSPR_UAV; -+ struct vkd3d_shader_register reg = -+ { -+ .type = is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, -+ .idx[0].offset = register_id, -+ .idx_count = 1, -+ }; -+ -+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && sample_count == 1) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; -+ - if (!(resource_type_info = spirv_compiler_enable_resource_type(compiler, - resource_type, is_uav))) - { -@@ -5782,11 +5892,11 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - return; - } - -- sampled_type = vkd3d_component_type_from_data_type(resource_data_type); -+ sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); - -- if (spirv_compiler_has_combined_sampler(compiler, resource, NULL)) -+ if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) - { -- spirv_compiler_emit_combined_sampler_declarations(compiler, reg, &resource->range, -+ spirv_compiler_emit_combined_sampler_declarations(compiler, ®, range, - resource_type, sampled_type, structure_stride, raw, resource_type_info); - return; - } -@@ -5809,19 +5919,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - } - else - { -- type_id = spirv_compiler_get_image_type_id(compiler, reg, &resource->range, -+ type_id = spirv_compiler_get_image_type_id(compiler, ®, range, - resource_type_info, sampled_type, structure_stride || raw, 0); - } - -- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, -- &resource->range, resource_type, false, &var_info); -+ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -+ range, resource_type, false, &var_info); - - if (is_uav) - { -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - -- d = spirv_compiler_get_descriptor_info(compiler, -- VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); -+ d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - - if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); -@@ -5853,15 +5962,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - type_id = struct_id; - } - -- counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, -- &resource->range, resource_type, true, &counter_var_info); -+ counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, -+ type_id, ®, range, resource_type, true, &counter_var_info); - } - } - -- vkd3d_symbol_make_resource(&resource_symbol, reg); -+ vkd3d_symbol_make_resource(&resource_symbol, ®); - resource_symbol.id = var_id; - resource_symbol.descriptor_array = var_info.array_symbol; -- resource_symbol.info.resource.range = resource->range; -+ resource_symbol.info.resource.range = *range; - resource_symbol.info.resource.sampled_type = sampled_type; - resource_symbol.info.resource.type_id = type_id; - resource_symbol.info.resource.resource_type_info = resource_type_info; -@@ -5874,58 +5983,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - spirv_compiler_put_symbol(compiler, &resource_symbol); - } - --static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; -- enum vkd3d_shader_resource_type resource_type = semantic->resource_type; -- uint32_t flags = instruction->flags; -- -- /* We don't distinguish between APPEND and COUNTER UAVs. */ -- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; -- if (flags) -- FIXME("Unhandled UAV flags %#x.\n", flags); -- -- if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) -- resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -- else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) -- resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; -- -- spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, -- resource_type, semantic->resource_data_type[0], 0, false); --} -- --static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_raw_resource *resource = &instruction->declaration.raw_resource; -- uint32_t flags = instruction->flags; -- -- /* We don't distinguish between APPEND and COUNTER UAVs. */ -- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; -- if (flags) -- FIXME("Unhandled UAV flags %#x.\n", flags); -- -- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, 0, true); --} -- --static void spirv_compiler_emit_dcl_resource_structured(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_structured_resource *resource = &instruction->declaration.structured_resource; -- unsigned int stride = resource->byte_stride; -- uint32_t flags = instruction->flags; -- -- /* We don't distinguish between APPEND and COUNTER UAVs. */ -- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; -- if (flags) -- FIXME("Unhandled UAV flags %#x.\n", flags); -- -- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, stride / 4, false); --} -- - static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) - { -@@ -6236,9 +6293,6 @@ static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) - - vkd3d_spirv_build_op_function_end(builder); - -- compiler->temp_id = 0; -- compiler->temp_count = 0; -- - if (is_in_control_point_phase(compiler)) - { - if (compiler->epilogue_function_id) -@@ -6640,7 +6694,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - uint32_t components[VKD3D_VEC4_SIZE]; - unsigned int i, component_count; - -- if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) -+ if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) - goto general_implementation; - - spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); -@@ -7398,7 +7452,13 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c - assert(compiler->control_flow_depth); - assert(cf_info->current_block == VKD3D_BLOCK_SWITCH); - -- assert(src->swizzle == VKD3D_SHADER_NO_SWIZZLE && src->reg.type == VKD3DSPR_IMMCONST); -+ if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) -+ { -+ WARN("Unexpected src swizzle %#x.\n", src->swizzle); -+ spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, -+ "The swizzle for a switch case value is not scalar."); -+ } -+ assert(src->reg.type == VKD3DSPR_IMMCONST); - value = *src->reg.u.immconst_uint; - - if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, -@@ -9103,33 +9163,12 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_GLOBAL_FLAGS: - spirv_compiler_emit_dcl_global_flags(compiler, instruction); - break; -- case VKD3DSIH_DCL_TEMPS: -- spirv_compiler_emit_dcl_temps(compiler, instruction); -- break; - case VKD3DSIH_DCL_INDEXABLE_TEMP: - spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); - break; -- case VKD3DSIH_DCL_CONSTANT_BUFFER: -- spirv_compiler_emit_dcl_constant_buffer(compiler, instruction); -- break; - case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: - spirv_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); - break; -- case VKD3DSIH_DCL_SAMPLER: -- spirv_compiler_emit_dcl_sampler(compiler, instruction); -- break; -- case VKD3DSIH_DCL: -- case VKD3DSIH_DCL_UAV_TYPED: -- spirv_compiler_emit_dcl_resource(compiler, instruction); -- break; -- case VKD3DSIH_DCL_RESOURCE_RAW: -- case VKD3DSIH_DCL_UAV_RAW: -- spirv_compiler_emit_dcl_resource_raw(compiler, instruction); -- break; -- case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -- case VKD3DSIH_DCL_UAV_STRUCTURED: -- spirv_compiler_emit_dcl_resource_structured(compiler, instruction); -- break; - case VKD3DSIH_DCL_TGSM_RAW: - spirv_compiler_emit_dcl_tgsm_raw(compiler, instruction); - break; -@@ -9425,7 +9464,16 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CUT_STREAM: - spirv_compiler_emit_cut_stream(compiler, instruction); - break; -+ case VKD3DSIH_DCL: -+ case VKD3DSIH_DCL_CONSTANT_BUFFER: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: -+ case VKD3DSIH_DCL_RESOURCE_RAW: -+ case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -+ case VKD3DSIH_DCL_SAMPLER: -+ case VKD3DSIH_DCL_TEMPS: -+ case VKD3DSIH_DCL_UAV_RAW: -+ case VKD3DSIH_DCL_UAV_STRUCTURED: -+ case VKD3DSIH_DCL_UAV_TYPED: - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_NOP: - /* nothing to do */ -@@ -9437,6 +9485,50 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - return ret; - } - -+static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) -+ { -+ const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; -+ struct vkd3d_shader_register_range range; -+ -+ range.first = descriptor->register_index; -+ if (descriptor->count == ~0u) -+ range.last = ~0u; -+ else -+ range.last = descriptor->register_index + descriptor->count - 1; -+ range.space = descriptor->register_space; -+ -+ switch (descriptor->type) -+ { -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: -+ spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -+ spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: -+ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, -+ descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, -+ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: -+ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, -+ descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, -+ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ } -+} -+ - static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv) -@@ -9444,28 +9536,31 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; - struct vkd3d_shader_instruction_array instructions; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - -+ if (parser->shader_desc.temp_count) -+ spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); -+ -+ spirv_compiler_emit_descriptor_declarations(compiler); -+ - compiler->location.column = 0; - compiler->location.line = 1; - -+ if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) -+ return result; -+ - instructions = parser->instructions; - memset(&parser->instructions, 0, sizeof(parser->instructions)); - -- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL -- && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) -- { -- result = instruction_array_normalise_hull_shader_control_point_io(&instructions, -- &compiler->input_signature); -- } -- if (result >= 0) -- result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, -- &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); -- -- if (result >= 0 && TRACE_ON()) -- vkd3d_shader_trace(&instructions, &parser->shader_version); -+ compiler->input_signature = shader_desc->input_signature; -+ compiler->output_signature = shader_desc->output_signature; -+ compiler->patch_constant_signature = shader_desc->patch_constant_signature; -+ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -+ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -+ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); - - if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); -@@ -9541,7 +9636,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - } - - int spirv_compile(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index d066b13ee4e..045fb6c5f64 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -430,6 +430,8 @@ enum vkd3d_sm4_register_type - VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, - VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, - VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, -+ -+ VKD3D_SM4_REGISTER_TYPE_COUNT, - }; - - enum vkd3d_sm4_extended_operand_type -@@ -505,7 +507,7 @@ enum vkd3d_sm4_input_primitive_type - - enum vkd3d_sm4_swizzle_type - { -- VKD3D_SM4_SWIZZLE_NONE = 0x0, -+ VKD3D_SM4_SWIZZLE_NONE = 0x0, /* swizzle bitfield contains a mask */ - VKD3D_SM4_SWIZZLE_VEC4 = 0x1, - VKD3D_SM4_SWIZZLE_SCALAR = 0x2, - }; -@@ -571,6 +573,12 @@ struct sm4_index_range_array - struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; - }; - -+struct vkd3d_sm4_lookup_tables -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; -+ const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; -+}; -+ - struct vkd3d_shader_sm4_parser - { - const uint32_t *start, *end, *ptr; -@@ -587,6 +595,8 @@ struct vkd3d_shader_sm4_parser - struct sm4_index_range_array output_index_ranges; - struct sm4_index_range_array patch_constant_index_ranges; - -+ struct vkd3d_sm4_lookup_tables lookup; -+ - struct vkd3d_shader_parser p; - }; - -@@ -697,6 +707,19 @@ static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, - VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; - } - -+static void shader_sm4_read_case_condition(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, -+ (struct vkd3d_shader_src_param *)&ins->src[0]); -+ if (ins->src[0].reg.type != VKD3DSPR_IMMCONST) -+ { -+ FIXME("Switch case value is not a 32-bit constant.\n"); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE, -+ "Switch case value is not a 32-bit immediate constant register."); -+ } -+} -+ - static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) - { -@@ -989,6 +1012,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) - { - ins->declaration.count = *tokens; -+ if (opcode == VKD3D_SM4_OP_DCL_TEMPS) -+ priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); - } - - static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1203,7 +1228,8 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = - {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, - {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, -- {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, -+ {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", -+ shader_sm4_read_case_condition}, - {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, - {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, -@@ -1466,50 +1492,10 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = - {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, - }; - --static const enum vkd3d_shader_register_type register_type_table[] = --{ -- /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, -- /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, -- /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, -- /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, -- /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, -- /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, -- /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, -- /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, -- /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, -- /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, -- /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, -- /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, -- /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, -- /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, -- /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, -- /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, -- /* UNKNOWN */ ~0u, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, -- /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, -- /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, -- /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, -- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, -- /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, -- /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, -- /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, -- /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, -- /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, -- /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, -- /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, -- /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, -- /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, -- /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, -- /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, -- /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, -- /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, -+struct vkd3d_sm4_register_type_info -+{ -+ enum vkd3d_sm4_register_type sm4_type; -+ enum vkd3d_shader_register_type vkd3d_type; - }; - - static const enum vkd3d_shader_register_precision register_precision_table[] = -@@ -1522,18 +1508,104 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = - /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, - }; - -+struct tpf_writer -+{ -+ struct hlsl_ctx *ctx; -+ struct vkd3d_bytecode_buffer *buffer; -+ struct vkd3d_sm4_lookup_tables lookup; -+}; -+ - static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) - { - unsigned int i; - - for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) - { -- if (opcode == opcode_table[i].opcode) return &opcode_table[i]; -+ if (opcode == opcode_table[i].opcode) -+ return &opcode_table[i]; - } - - return NULL; - } - -+static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) -+{ -+ const struct vkd3d_sm4_register_type_info *info; -+ unsigned int i; -+ -+ static const struct vkd3d_sm4_register_type_info register_type_table[] = -+ { -+ {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, -+ {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, -+ {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, -+ {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, -+ {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, -+ {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, -+ {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, -+ {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, -+ {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, -+ {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, -+ {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, -+ {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, -+ {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, -+ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, -+ {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, -+ {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, -+ {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, -+ {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, -+ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, -+ {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, -+ {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, -+ {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, -+ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, -+ {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, -+ {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, -+ {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, -+ {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, -+ {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, -+ {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, -+ {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, -+ {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, -+ {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, -+ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, -+ {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, -+ {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, -+ {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, -+ }; -+ -+ memset(lookup, 0, sizeof(*lookup)); -+ -+ for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) -+ { -+ info = ®ister_type_table[i]; -+ lookup->register_type_info_from_sm4[info->sm4_type] = info; -+ lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; -+ } -+} -+ -+static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+{ -+ tpf->ctx = ctx; -+ tpf->buffer = buffer; -+ init_sm4_lookup_tables(&tpf->lookup); -+} -+ -+static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) -+{ -+ if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) -+ return NULL; -+ return lookup->register_type_info_from_sm4[sm4_type]; -+} -+ -+static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) -+{ -+ if (vkd3d_type >= VKD3DSPR_COUNT) -+ return NULL; -+ return lookup->register_type_info_from_vkd3d[vkd3d_type]; -+} -+ - static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) - { - switch (sm4->p.shader_version.type) -@@ -1640,6 +1712,7 @@ static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_typ - static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, - enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) - { -+ const struct vkd3d_sm4_register_type_info *register_type_info; - enum vkd3d_sm4_register_precision precision; - enum vkd3d_sm4_register_type register_type; - enum vkd3d_sm4_extended_operand_type type; -@@ -1654,15 +1727,15 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui - token = *(*ptr)++; - - register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; -- if (register_type >= ARRAY_SIZE(register_type_table) -- || register_type_table[register_type] == VKD3DSPR_INVALID) -+ register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); -+ if (!register_type_info) - { - FIXME("Unhandled register type %#x.\n", register_type); - param->type = VKD3DSPR_TEMP; - } - else - { -- param->type = register_type_table[register_type]; -+ param->type = register_type_info->vkd3d_type; - } - param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - param->non_uniform = false; -@@ -1953,6 +2026,7 @@ static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_pa - static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) - { -+ unsigned int dimension, mask; - DWORD token; - - if (*ptr >= end) -@@ -1968,37 +2042,63 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons - return false; - } - -- if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) -+ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) - { -- src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -- } -- else -- { -- enum vkd3d_sm4_swizzle_type swizzle_type = -- (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ case VKD3D_SM4_DIMENSION_NONE: -+ case VKD3D_SM4_DIMENSION_SCALAR: -+ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ break; - -- switch (swizzle_type) -+ case VKD3D_SM4_DIMENSION_VEC4: - { -- case VKD3D_SM4_SWIZZLE_NONE: -- if (shader_sm4_is_scalar_register(&src_param->reg)) -- src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -- else -+ enum vkd3d_sm4_swizzle_type swizzle_type = -+ (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ -+ switch (swizzle_type) -+ { -+ case VKD3D_SM4_SWIZZLE_NONE: - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -- break; - -- case VKD3D_SM4_SWIZZLE_SCALAR: -- src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; -- src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; -- break; -+ mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; -+ /* Mask seems only to be used for vec4 constants and is always zero. */ -+ if (!register_is_constant(&src_param->reg)) -+ { -+ FIXME("Source mask %#x is not for a constant.\n", mask); -+ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, -+ "Unhandled mask %#x for a non-constant source register.", mask); -+ } -+ else if (mask) -+ { -+ FIXME("Unhandled mask %#x.\n", mask); -+ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, -+ "Unhandled source register mask %#x.", mask); -+ } - -- case VKD3D_SM4_SWIZZLE_VEC4: -- src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); -- break; -+ break; - -- default: -- FIXME("Unhandled swizzle type %#x.\n", swizzle_type); -- break; -+ case VKD3D_SM4_SWIZZLE_SCALAR: -+ src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; -+ src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; -+ break; -+ -+ case VKD3D_SM4_SWIZZLE_VEC4: -+ src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); -+ break; -+ -+ default: -+ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, -+ "Source register swizzle type %#x is invalid.", swizzle_type); -+ break; -+ } -+ break; - } -+ -+ default: -+ FIXME("Unhandled dimension %#x.\n", dimension); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, -+ "Source register dimension %#x is invalid.", dimension); -+ break; - } - - if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, -@@ -2011,7 +2111,9 @@ static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, cons - static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) - { -+ enum vkd3d_sm4_swizzle_type swizzle_type; - enum vkd3d_shader_src_modifier modifier; -+ unsigned int dimension, swizzle; - DWORD token; - - if (*ptr >= end) -@@ -2033,10 +2135,53 @@ static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, cons - return false; - } - -- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; -+ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) -+ { -+ case VKD3D_SM4_DIMENSION_NONE: -+ dst_param->write_mask = 0; -+ break; -+ -+ case VKD3D_SM4_DIMENSION_SCALAR: -+ dst_param->write_mask = VKD3DSP_WRITEMASK_0; -+ break; -+ -+ case VKD3D_SM4_DIMENSION_VEC4: -+ swizzle_type = (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ switch (swizzle_type) -+ { -+ case VKD3D_SM4_SWIZZLE_NONE: -+ dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; -+ break; -+ -+ case VKD3D_SM4_SWIZZLE_VEC4: -+ swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); -+ if (swizzle != VKD3D_SHADER_NO_SWIZZLE) -+ { -+ FIXME("Unhandled swizzle %#x.\n", swizzle); -+ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE, -+ "Unhandled destination register swizzle %#x.", swizzle); -+ } -+ dst_param->write_mask = VKD3DSP_WRITEMASK_ALL; -+ break; -+ -+ default: -+ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, -+ "Destination register swizzle type %#x is invalid.", swizzle_type); -+ break; -+ } -+ break; -+ -+ default: -+ FIXME("Unhandled dimension %#x.\n", dimension); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, -+ "Destination register dimension %#x is invalid.", dimension); -+ break; -+ } -+ - if (data_type == VKD3D_DATA_DOUBLE) - dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); -- /* Scalar registers are declared with no write mask in shader bytecode. */ -+ /* Some scalar registers are declared with no write mask in shader bytecode. */ - if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) - dst_param->write_mask = VKD3DSP_WRITEMASK_0; - dst_param->modifiers = 0; -@@ -2362,6 +2507,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t - sm4->output_map[e->register_index] = e->semantic_index; - } - -+ init_sm4_lookup_tables(&sm4->lookup); -+ - return true; - } - -@@ -2442,6 +2589,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - } - - shader_desc = &sm4->p.shader_desc; -+ shader_desc->is_dxil = false; - if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) - { -@@ -2499,7 +2647,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi - return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; - } - --static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); -+static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); - - static bool type_is_integer(const struct hlsl_type *type) - { -@@ -2516,7 +2664,7 @@ static bool type_is_integer(const struct hlsl_type *type) - } - - bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) -+ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) - { - unsigned int i; - -@@ -2526,24 +2674,24 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - bool output; - enum vkd3d_shader_type shader_type; - enum vkd3d_sm4_swizzle_type swizzle_type; -- enum vkd3d_sm4_register_type type; -+ enum vkd3d_shader_register_type type; - bool has_idx; - } - register_table[] = - { -- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, -- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, -- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, -+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, -+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, -+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false}, - -- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false}, - - /* Put sv_target in this table, instead of letting it fall through to - * default varying allocation, so that the register index matches the - * usage index. */ -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) -@@ -2552,7 +2700,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type) - { -- *type = register_table[i].type; -+ if (type) -+ *type = register_table[i].type; - if (swizzle_type) - *swizzle_type = register_table[i].swizzle_type; - *has_idx = register_table[i].has_idx; -@@ -2624,7 +2773,8 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant - return true; - } - --static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) -+static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, -+ uint32_t tag, struct vkd3d_bytecode_buffer *buffer) - { - /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN - * sections to be aligned. Without this, the sections themselves will be -@@ -2632,6 +2782,9 @@ static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_byt - size_t size = bytecode_align(buffer); - - dxbc_writer_add_section(dxbc, tag, buffer->data, size); -+ -+ if (buffer->status < 0) -+ ctx->result = buffer->status; - } - - static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -@@ -2649,7 +2802,6 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -- enum vkd3d_sm4_register_type type; - uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; -@@ -2663,14 +2815,13 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - continue; - usage_idx = var->semantic.index; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) -+ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; - } - else - { - assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -- type = VKD3D_SM4_RT_INPUT; - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - -@@ -2739,7 +2890,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - - set_u32(&buffer, count_position, i); - -- add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); -+ add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); - } - - static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -@@ -2827,6 +2978,22 @@ static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) - return D3D_SVT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3D_SVT_VOID; -+ case HLSL_TYPE_UAV: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3D_SVT_RWTEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3D_SVT_RWTEXTURE2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3D_SVT_RWTEXTURE3D; -+ case HLSL_SAMPLER_DIM_1DARRAY: -+ return D3D_SVT_RWTEXTURE1DARRAY; -+ case HLSL_SAMPLER_DIM_2DARRAY: -+ return D3D_SVT_RWTEXTURE2DARRAY; -+ default: -+ vkd3d_unreachable(); -+ } - default: - vkd3d_unreachable(); - } -@@ -2967,47 +3134,154 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ - } - } - -+struct extern_resource -+{ -+ /* var is only not NULL if this resource is a whole variable, so it may be responsible for more -+ * than one component. */ -+ const struct hlsl_ir_var *var; -+ -+ char *name; -+ struct hlsl_type *data_type; -+ bool is_user_packed; -+ -+ enum hlsl_regset regset; -+ unsigned int id, bind_count; -+}; -+ - static int sm4_compare_extern_resources(const void *a, const void *b) - { -- const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; -- const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; -- enum hlsl_regset aa_regset, bb_regset; -+ const struct extern_resource *aa = (const struct extern_resource *)a; -+ const struct extern_resource *bb = (const struct extern_resource *)b; -+ int r; - -- aa_regset = hlsl_type_get_regset(aa->data_type); -- bb_regset = hlsl_type_get_regset(bb->data_type); -+ if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) -+ return r; - -- if (aa_regset != bb_regset) -- return aa_regset - bb_regset; -+ return vkd3d_u32_compare(aa->id, bb->id); -+} - -- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; -+static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free(extern_resources[i].name); -+ vkd3d_free(extern_resources); - } - --static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+static const char *string_skip_tag(const char *string) - { -- const struct hlsl_ir_var **extern_resources = NULL; -+ if (!strncmp(string, "", strlen(""))) -+ return string + strlen(""); -+ return string; -+} -+ -+static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+{ -+ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; -+ struct extern_resource *extern_resources = NULL; - const struct hlsl_ir_var *var; - enum hlsl_regset regset; - size_t capacity = 0; -+ char *name; - - *count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!hlsl_type_is_resource(var->data_type)) -- continue; -- regset = hlsl_type_get_regset(var->data_type); -- if (!var->regs[regset].allocated) -- continue; -- -- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -- sizeof(*extern_resources)))) -+ if (separate_components) - { -- *count = 0; -- return NULL; -+ unsigned int component_count = hlsl_type_component_count(var->data_type); -+ unsigned int k, regset_offset; -+ -+ for (k = 0; k < component_count; ++k) -+ { -+ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ struct vkd3d_string_buffer *name_buffer; -+ -+ if (!hlsl_type_is_resource(component_type)) -+ continue; -+ -+ regset = hlsl_type_get_regset(component_type); -+ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); -+ -+ if (regset_offset > var->regs[regset].allocation_size) -+ continue; -+ -+ if (var->objects_usage[regset][regset_offset].used) -+ { -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ hlsl_release_string_buffer(ctx, name_buffer); -+ return NULL; -+ } -+ hlsl_release_string_buffer(ctx, name_buffer); -+ -+ extern_resources[*count].var = NULL; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = component_type; -+ extern_resources[*count].is_user_packed = false; -+ -+ extern_resources[*count].regset = regset; -+ extern_resources[*count].id = var->regs[regset].id + regset_offset; -+ extern_resources[*count].bind_count = 1; -+ -+ ++*count; -+ } -+ } - } -+ else -+ { -+ if (!hlsl_type_is_resource(var->data_type)) -+ continue; -+ regset = hlsl_type_get_regset(var->data_type); -+ if (!var->regs[regset].allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = var; - -- extern_resources[*count] = var; -- ++*count; -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = var->data_type; -+ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -+ -+ extern_resources[*count].regset = regset; -+ extern_resources[*count].id = var->regs[regset].id; -+ extern_resources[*count].bind_count = var->bind_count[regset]; -+ -+ ++*count; -+ } - } - - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); -@@ -3020,8 +3294,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; -- const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; -+ struct extern_resource *extern_resources; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - -@@ -3075,18 +3349,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - for (i = 0; i < extern_resources_count; ++i) - { -- enum hlsl_regset regset; -+ const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - -- var = extern_resources[i]; -- regset = hlsl_type_get_regset(var->data_type); -- -- if (var->reg_reservation.reg_type) -+ if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, sm4_resource_type(var->data_type)); -- if (regset == HLSL_REGSET_SAMPLERS) -+ put_u32(&buffer, sm4_resource_type(resource->data_type)); -+ if (resource->regset == HLSL_REGSET_SAMPLERS) - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); -@@ -3094,15 +3365,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - } - else - { -- unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; -+ unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx; - -- put_u32(&buffer, sm4_resource_format(var->data_type)); -- put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); -+ put_u32(&buffer, sm4_resource_format(resource->data_type)); -+ put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } -- put_u32(&buffer, var->regs[regset].id); -- put_u32(&buffer, var->regs[regset].bind_count); -+ put_u32(&buffer, resource->id); -+ put_u32(&buffer, resource->bind_count); - put_u32(&buffer, flags); - } - -@@ -3128,9 +3399,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - for (i = 0; i < extern_resources_count; ++i) - { -- var = extern_resources[i]; -+ const struct extern_resource *resource = &extern_resources[i]; - -- string_offset = put_string(&buffer, var->name); -+ string_offset = put_string(&buffer, resource->name); - set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); - } - -@@ -3234,9 +3505,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - -- add_section(dxbc, TAG_RDEF, &buffer); -+ add_section(ctx, dxbc, TAG_RDEF, &buffer); - -- vkd3d_free(extern_resources); -+ sm4_free_extern_resources(extern_resources, extern_resources_count); - } - - static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -@@ -3308,8 +3579,8 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod - - struct sm4_register - { -- enum vkd3d_sm4_register_type type; -- uint32_t idx[2]; -+ enum vkd3d_shader_register_type type; -+ struct vkd3d_shader_register_index idx[2]; - unsigned int idx_count; - enum vkd3d_sm4_dimension dim; - uint32_t immconst_uint[4]; -@@ -3346,8 +3617,9 @@ struct sm4_instruction - - static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, - unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, -- const struct hlsl_deref *deref, const struct hlsl_type *data_type) -+ const struct hlsl_deref *deref) - { -+ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); - const struct hlsl_ir_var *var = deref->var; - - if (var->is_uniform) -@@ -3356,37 +3628,37 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - - if (regset == HLSL_REGSET_TEXTURES) - { -- reg->type = VKD3D_SM4_RT_RESOURCE; -+ reg->type = VKD3DSPR_RESOURCE; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; -- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -- assert(deref->offset_regset == HLSL_REGSET_TEXTURES); -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(regset == HLSL_REGSET_TEXTURES); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { -- reg->type = VKD3D_SM5_RT_UAV; -+ reg->type = VKD3DSPR_UAV; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; -- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -- assert(deref->offset_regset == HLSL_REGSET_UAVS); -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(regset == HLSL_REGSET_UAVS); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { -- reg->type = VKD3D_SM4_RT_SAMPLER; -+ reg->type = VKD3DSPR_SAMPLER; - reg->dim = VKD3D_SM4_DIMENSION_NONE; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; -- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); -- assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(regset == HLSL_REGSET_SAMPLERS); - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } -@@ -3395,12 +3667,12 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - assert(data_type->class <= HLSL_CLASS_VECTOR); -- reg->type = VKD3D_SM4_RT_CONSTBUFFER; -+ reg->type = VKD3DSPR_CONSTBUFFER; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->buffer->reg.id; -- reg->idx[1] = offset / 4; -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = offset / 4; - reg->idx_count = 2; - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } -@@ -3415,7 +3687,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - - if (has_idx) - { -- reg->idx[0] = var->semantic.index + offset / 4; -+ reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - -@@ -3427,11 +3699,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_INPUT; -+ reg->type = VKD3DSPR_INPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = hlsl_reg.id; -+ reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -@@ -3446,11 +3718,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - - if (has_idx) - { -- reg->idx[0] = var->semantic.index + offset / 4; -+ reg->idx[0].offset = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - -- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) -+ if (reg->type == VKD3DSPR_DEPTHOUT) - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - else - reg->dim = VKD3D_SM4_DIMENSION_VEC4; -@@ -3461,9 +3733,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_OUTPUT; -+ reg->type = VKD3DSPR_OUTPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- reg->idx[0] = hlsl_reg.id; -+ reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -@@ -3473,22 +3745,22 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_TEMP; -+ reg->type = VKD3DSPR_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = hlsl_reg.id; -+ reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - - static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, -- const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) -+ const struct hlsl_deref *deref, unsigned int map_writemask) - { - unsigned int writemask; - -- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); -+ sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - } -@@ -3497,10 +3769,10 @@ static void sm4_register_from_node(struct sm4_register *reg, unsigned int *write - enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) - { - assert(instr->reg.allocated); -- reg->type = VKD3D_SM4_RT_TEMP; -+ reg->type = VKD3DSPR_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = instr->reg.id; -+ reg->idx[0].offset = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; - } -@@ -3516,7 +3788,7 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, - const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) - { - src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- src->reg.type = VKD3D_SM4_RT_IMMCONST; -+ src->reg.type = VKD3DSPR_IMMCONST; - if (width == 1) - { - src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; -@@ -3529,8 +3801,10 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, - src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; - for (i = 0; i < 4; ++i) - { -- if (map_writemask & (1u << i)) -+ if ((map_writemask & (1u << i)) && (j < width)) - src->reg.immconst_uint[i] = value->u[j++].u; -+ else -+ src->reg.immconst_uint[i] = 0; - } - } - } -@@ -3553,17 +3827,100 @@ static void sm4_src_from_node(struct sm4_src_register *src, - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); - } - --static uint32_t sm4_encode_register(const struct sm4_register *reg) -+static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) - { -- return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) -- | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) -- | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); -+ const struct vkd3d_sm4_register_type_info *register_type_info; -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -+ uint32_t sm4_reg_type, reg_dim; -+ uint32_t token = 0; -+ unsigned int j; -+ -+ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); -+ if (!register_type_info) -+ { -+ FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); -+ sm4_reg_type = VKD3D_SM4_RT_TEMP; -+ } -+ else -+ { -+ sm4_reg_type = register_type_info->sm4_type; -+ } -+ -+ reg_dim = dst->reg.dim; -+ -+ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; -+ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; -+ put_u32(buffer, token); -+ -+ for (j = 0; j < dst->reg.idx_count; ++j) -+ { -+ put_u32(buffer, dst->reg.idx[j].offset); -+ assert(!dst->reg.idx[j].rel_addr); -+ } -+} -+ -+static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info; -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -+ uint32_t sm4_reg_type, reg_dim; -+ uint32_t token = 0; -+ unsigned int j; -+ -+ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); -+ if (!register_type_info) -+ { -+ FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); -+ sm4_reg_type = VKD3D_SM4_RT_TEMP; -+ } -+ else -+ { -+ sm4_reg_type = register_type_info->sm4_type; -+ } -+ -+ reg_dim = src->reg.dim; -+ -+ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; -+ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ { -+ token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; -+ } -+ if (src->reg.mod) -+ token |= VKD3D_SM4_EXTENDED_OPERAND; -+ put_u32(buffer, token); -+ -+ if (src->reg.mod) -+ put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -+ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); -+ -+ for (j = 0; j < src->reg.idx_count; ++j) -+ { -+ put_u32(buffer, src->reg.idx[j].offset); -+ assert(!src->reg.idx[j].rel_addr); -+ } -+ -+ if (src->reg.type == VKD3DSPR_IMMCONST) -+ { -+ put_u32(buffer, src->reg.immconst_uint[0]); -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ { -+ put_u32(buffer, src->reg.immconst_uint[1]); -+ put_u32(buffer, src->reg.immconst_uint[2]); -+ put_u32(buffer, src->reg.immconst_uint[3]); -+ } -+ } - } - - static uint32_t sm4_register_order(const struct sm4_register *reg) - { - uint32_t order = 1; -- if (reg->type == VKD3D_SM4_RT_IMMCONST) -+ if (reg->type == VKD3DSPR_IMMCONST) - order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; - order += reg->idx_count; - if (reg->mod) -@@ -3571,8 +3928,9 @@ static uint32_t sm4_register_order(const struct sm4_register *reg) - return order; - } - --static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) -+static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) - { -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = instr->opcode; - unsigned int size = 1, i, j; - -@@ -3600,43 +3958,10 @@ static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const st - } - - for (i = 0; i < instr->dst_count; ++i) -- { -- token = sm4_encode_register(&instr->dsts[i].reg); -- if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -- token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; -- put_u32(buffer, token); -- -- for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) -- put_u32(buffer, instr->dsts[i].reg.idx[j]); -- } -+ sm4_write_dst_register(tpf, &instr->dsts[i]); - - for (i = 0; i < instr->src_count; ++i) -- { -- token = sm4_encode_register(&instr->srcs[i].reg); -- token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -- token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; -- if (instr->srcs[i].reg.mod) -- token |= VKD3D_SM4_EXTENDED_OPERAND; -- put_u32(buffer, token); -- -- if (instr->srcs[i].reg.mod) -- put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -- | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); -- -- for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) -- put_u32(buffer, instr->srcs[i].reg.idx[j]); -- -- if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) -- { -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); -- if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -- { -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); -- } -- } -- } -+ sm4_write_src_register(tpf, &instr->srcs[i]); - - if (instr->byte_stride) - put_u32(buffer, instr->byte_stride); -@@ -3672,67 +3997,75 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - return true; - } - --static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) -+static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) - { - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, -- .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, -- .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, -+ .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, -+ .srcs[0].reg.idx[0].offset = cbuffer->reg.id, -+ .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, - .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, - .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), - .src_count = 1, - }; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -+static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) - { -- unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; -+ struct hlsl_type *component_type; -+ unsigned int i; - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - -- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, -+ .dsts[0].reg.type = VKD3DSPR_SAMPLER, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - -- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) -+ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); -+ -+ if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) - instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; - -- for (i = 0; i < count; ++i) -+ assert(resource->regset == HLSL_REGSET_SAMPLERS); -+ -+ for (i = 0; i < resource->bind_count; ++i) - { -- if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -+ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - -- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; -- write_sm4_instruction(buffer, &instr); -+ instr.dsts[0].reg.idx[0].offset = resource->id + i; -+ write_sm4_instruction(tpf, &instr); - } - } - --static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_var *var, bool uav) -+static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, -+ bool uav) - { - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; -- unsigned int i, count = var->data_type->reg_size[regset]; - struct hlsl_type *component_type; - struct sm4_instruction instr; -+ unsigned int i; - -- component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); -+ assert(resource->regset == regset); - -- for (i = 0; i < count; ++i) -+ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); -+ -+ for (i = 0; i < resource->bind_count; ++i) - { -- if (!var->objects_usage[regset][i].used) -+ if (resource->var && !resource->var->objects_usage[regset][i].used) - continue; - - instr = (struct sm4_instruction) - { -- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, -- .dsts[0].reg.idx = {var->regs[regset].id + i}, -+ .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, -+ .dsts[0].reg.idx[0].offset = resource->id + i, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - -@@ -3742,11 +4075,11 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - - if (uav) - { -- switch (var->data_type->sampler_dim) -+ switch (resource->data_type->sampler_dim) - { - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; -- instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; -+ instr.byte_stride = resource->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; - break; - default: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; -@@ -3765,13 +4098,13 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - } - --static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -+static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) - { -- const struct hlsl_profile_info *profile = ctx->profile; -+ const struct hlsl_profile_info *profile = tpf->ctx->profile; - const bool output = var->is_output_semantic; - D3D_NAME usage; - bool has_idx; -@@ -3782,11 +4115,11 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - .dst_count = 1, - }; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) -+ if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) - { - if (has_idx) - { -- instr.dsts[0].reg.idx[0] = var->semantic.index; -+ instr.dsts[0].reg.idx[0].offset = var->semantic.index; - instr.dsts[0].reg.idx_count = 1; - } - else -@@ -3797,16 +4130,16 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - } - else - { -- instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; -- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; -+ instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - -- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) -+ if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) - instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - -- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -+ hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; - -@@ -3866,10 +4199,10 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - break; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) -+static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) - { - struct sm4_instruction instr = - { -@@ -3879,33 +4212,35 @@ static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t t - .idx_count = 1, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) -+static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - -- .idx = {thread_count[0], thread_count[1], thread_count[2]}, -+ .idx[0] = thread_count[0], -+ .idx[1] = thread_count[1], -+ .idx[2] = thread_count[2], - .idx_count = 3, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) -+static void write_sm4_ret(const struct tpf_writer *tpf) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) - { - struct sm4_instruction instr; -@@ -3920,12 +4255,11 @@ static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_ - instr.srcs[0].reg.mod = src_mod; - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, -- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, -- const struct hlsl_ir_node *src) -+static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) - { - struct sm4_instruction instr; - -@@ -3935,7 +4269,7 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); -- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; -+ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; -@@ -3943,10 +4277,10 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -3961,11 +4295,11 @@ static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - - /* dp# instructions don't map the swizzle. */ --static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -3980,10 +4314,10 @@ static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum v - sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, -+static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { -@@ -3995,7 +4329,7 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); -- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; -+ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; -@@ -4004,15 +4338,35 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, -+ const struct hlsl_ir_node *src3) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = opcode; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); -+ sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); -+ sm4_src_from_node(&instr.srcs[2], src3, instr.dsts[0].writemask); -+ instr.src_count = 3; -+ -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -+static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, - enum hlsl_sampler_dim dim) - { -+ const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); - bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); -@@ -4029,7 +4383,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } -@@ -4052,7 +4406,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - - sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); - -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); - - instr.src_count = 2; - -@@ -4067,13 +4421,13 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - - memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); - instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- reg->type = VKD3D_SM4_RT_IMMCONST; -+ reg->type = VKD3DSPR_IMMCONST; - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = index->value.u[0].u; - } -- else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) -+ else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) - { -- hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); -+ hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); - } - else - { -@@ -4083,13 +4437,11 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - ++instr.src_count; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_resource_load *load) -+static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) - { -- const struct hlsl_type *resource_type = load->resource.var->data_type; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - const struct hlsl_deref *resource = &load->resource; -@@ -4132,7 +4484,7 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } -@@ -4142,8 +4494,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); -- sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 3; - - if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD -@@ -4165,7 +4517,52 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - ++instr.src_count; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; -+ -+ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -+ if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ instr.opcode |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; -+ -+ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_RESINFO; -+ if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ instr.opcode |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); - } - - static bool type_is_float(const struct hlsl_type *type) -@@ -4173,8 +4570,7 @@ static bool type_is_float(const struct hlsl_type *type) - return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; - } - --static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, -+static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) - { - struct sm4_instruction instr; -@@ -4187,16 +4583,15 @@ static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, - - sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); - instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; -+ instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; - instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - instr.srcs[1].reg.immconst_uint[0] = mask; - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_cast(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -+static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) - { - static const union - { -@@ -4218,23 +4613,23 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); -+ write_sm4_cast_from_bool(tpf, expr, arg1, one.u); - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); - break; - - default: -@@ -4247,20 +4642,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); -+ write_sm4_cast_from_bool(tpf, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); - break; - - default: -@@ -4273,20 +4668,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); -+ write_sm4_cast_from_bool(tpf, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); - break; - - default: -@@ -4295,7 +4690,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - break; - - case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); - break; - - case HLSL_TYPE_BOOL: -@@ -4305,35 +4700,35 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, - } - } - --static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) -+static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, -+ const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) - { - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); -+ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_expr(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -+static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) - { - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; -+ const struct hlsl_ir_node *arg3 = expr->operands[2].node; - const struct hlsl_type *dst_type = expr->node.data_type; - struct vkd3d_string_buffer *dst_type_string; - - assert(expr->node.reg.allocated); - -- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) -+ if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) - return; - - switch (expr->op) -@@ -4342,161 +4737,181 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_BIT_NOT: - assert(type_is_integer(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_CAST: -- write_sm4_cast(ctx, buffer, expr); -+ write_sm4_cast(tpf, expr); - break; - - case HLSL_OP1_COS: - assert(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); -+ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_DSX: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSX_COARSE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSX_FINE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSY_COARSE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSY_FINE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_EXP2: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FLOOR: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: - assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_REINTERPRET: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_OP1_ROUND: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: - assert(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); -+ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_TRUNC: - assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); - break; - - case HLSL_OP2_ADD: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_BIT_AND: - assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: - assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: - assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_DIV: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); - } - break; - -@@ -4507,15 +4922,15 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (arg1->data_type->dimx) - { - case 4: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); - break; - - case 3: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); - break; - - case 2: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); - break; - - case 1: -@@ -4525,7 +4940,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); - } - break; - -@@ -4538,18 +4953,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4564,21 +4979,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4593,21 +5008,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4615,37 +5030,37 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - - case HLSL_OP2_LOGIC_AND: - assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: - assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); - } - break; - -@@ -4653,19 +5068,19 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); - } - break; - -@@ -4673,11 +5088,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); - } - break; - -@@ -4685,18 +5100,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - /* Using IMUL instead of UMUL because we're taking the low - * bits, and the native compiler generates IMUL. */ -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); - } - break; - -@@ -4709,18 +5124,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); - break; - - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); - break; - } - break; -@@ -4729,18 +5144,22 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, - case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -+ write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - -+ case HLSL_OP3_MOVC: -+ write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); -+ break; -+ - default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); - } - -- hlsl_release_string_buffer(ctx, dst_type_string); -+ hlsl_release_string_buffer(tpf->ctx, dst_type_string); - } - --static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) -+static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) - { - struct sm4_instruction instr = - { -@@ -4751,26 +5170,25 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - assert(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - -- write_sm4_block(ctx, buffer, &iff->then_block); -+ write_sm4_block(tpf, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - -- write_sm4_block(ctx, buffer, &iff->else_block); -+ write_sm4_block(tpf, &iff->else_block); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_jump(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) -+static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) - { - struct sm4_instruction instr = {0}; - -@@ -4780,19 +5198,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - -- case HLSL_IR_JUMP_DISCARD: -+ case HLSL_IR_JUMP_DISCARD_NZ: - { -- struct sm4_register *reg = &instr.srcs[0].reg; -- - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; - - memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -- instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.src_count = 1; -- reg->type = VKD3D_SM4_RT_IMMCONST; -- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -- reg->immconst_uint[0] = ~0u; -- -+ sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; - } - -@@ -4800,11 +5212,11 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - return; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - - /* Does this variable's data come directly from the API user, rather than being -@@ -4818,8 +5230,7 @@ static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *va - return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; - } - --static void write_sm4_load(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) -+static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) - { - const struct hlsl_type *type = load->node.data_type; - struct sm4_instruction instr; -@@ -4830,7 +5241,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - instr.dst_count = 1; - - assert(type->class <= HLSL_CLASS_LAST_NUMERIC); -- if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) -+ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) - { - struct hlsl_constant_value value; - -@@ -4839,7 +5250,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - - instr.opcode = VKD3D_SM4_OP_MOVC; - -- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); - - memset(&value, 0xff, sizeof(value)); - sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); -@@ -4851,33 +5262,31 @@ static void write_sm4_load(struct hlsl_ctx *ctx, - { - instr.opcode = VKD3D_SM4_OP_MOV; - -- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); - instr.src_count = 1; - } - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_loop(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) -+static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_LOOP, - }; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - -- write_sm4_block(ctx, buffer, &loop->body); -+ write_sm4_block(tpf, &loop->body); - - instr.opcode = VKD3D_SM4_OP_ENDLOOP; -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, -- const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) -+static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, -+ unsigned int swizzle, const struct hlsl_ir_node *texel_offset) - { - struct sm4_src_register *src; - struct sm4_instruction instr; -@@ -4895,9 +5304,9 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- if (ctx->profile->major_version < 5) -+ if (tpf->ctx->profile->major_version < 5) - { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; - } -@@ -4906,58 +5315,39 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - } - } - -- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); - - src = &instr.srcs[instr.src_count++]; -- sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); - src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; - src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; - src->swizzle = swizzle; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_resource_load(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) -+static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) - { -- const struct hlsl_type *resource_type = load->resource.var->data_type; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; - const struct hlsl_ir_node *coords = load->coords.node; - -- if (!hlsl_type_is_resource(resource_type)) -+ if (load->sampler.var && !load->sampler.var->is_uniform) - { -- hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); -+ hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - -- if (load->sampler.var) -- { -- const struct hlsl_type *sampler_type = load->sampler.var->data_type; -- -- if (!hlsl_type_is_resource(sampler_type)) -- { -- hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); -- return; -- } -- -- if (!load->sampler.var->is_uniform) -- { -- hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -- return; -- } -- } -- - if (!load->resource.var->is_uniform) - { -- hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); -+ hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; - } - - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: -- write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, -+ write_sm4_ld(tpf, &load->node, &load->resource, - coords, sample_index, texel_offset, load->sampling_dim); - break; - -@@ -4967,64 +5357,61 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_GRAD: -- if (!load->sampler.var) -- { -- hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); -- return; -- } -- write_sm4_sample(ctx, buffer, load); -+ /* Combined sample expressions were lowered. */ -+ assert(load->sampler.var); -+ write_sm4_sample(tpf, load); - break; - - case HLSL_RESOURCE_GATHER_RED: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(W, W, W, W), texel_offset); -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_INFO: -+ write_sm4_sampleinfo(tpf, load); -+ break; -+ -+ case HLSL_RESOURCE_RESINFO: -+ write_sm4_resinfo(tpf, load); - break; - } - } - --static void write_sm4_resource_store(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) -+static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) - { -- const struct hlsl_type *resource_type = store->resource.var->data_type; -- -- if (!hlsl_type_is_resource(resource_type)) -- { -- hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); -- return; -- } -+ struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); - - if (!store->resource.var->is_uniform) - { -- hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); -+ hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { -- hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); -+ hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); - return; - } - -- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); -+ write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); - } - --static void write_sm4_store(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) -+static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) - { - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; -@@ -5033,18 +5420,17 @@ static void write_sm4_store(struct hlsl_ctx *ctx, - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); -+ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); - instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_swizzle(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) -+static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) - { - struct sm4_instruction instr; - unsigned int writemask; -@@ -5060,11 +5446,10 @@ static void write_sm4_swizzle(struct hlsl_ctx *ctx, - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); - instr.src_count = 1; - -- write_sm4_instruction(buffer, &instr); -+ write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block) -+static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; - -@@ -5074,12 +5459,12 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - { - if (instr->data_type->class == HLSL_CLASS_MATRIX) - { -- hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); - break; - } - else if (instr->data_type->class == HLSL_CLASS_OBJECT) - { -- hlsl_fixme(ctx, &instr->loc, "Object copy."); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); - break; - } - -@@ -5099,43 +5484,43 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - vkd3d_unreachable(); - - case HLSL_IR_EXPR: -- write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); -+ write_sm4_expr(tpf, hlsl_ir_expr(instr)); - break; - - case HLSL_IR_IF: -- write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); -+ write_sm4_if(tpf, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: -- write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); -+ write_sm4_jump(tpf, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: -- write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); -+ write_sm4_load(tpf, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: -- write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); -+ write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: -- write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); -+ write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: -- write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); -+ write_sm4_loop(tpf, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: -- write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); -+ write_sm4_store(tpf, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWIZZLE: -- write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); -+ write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); - break; - - default: -- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -+ hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } - } -@@ -5144,12 +5529,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) - { - const struct hlsl_profile_info *profile = ctx->profile; -- const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; -+ struct extern_resource *extern_resources; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - size_t token_count_position; -+ struct tpf_writer tpf; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { -@@ -5164,6 +5550,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - VKD3D_SM4_LIB, - }; - -+ tpf_writer_init(&tpf, ctx, &buffer); -+ - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); -@@ -5172,45 +5560,42 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- write_sm4_dcl_constant_buffer(&buffer, cbuffer); -+ write_sm4_dcl_constant_buffer(&tpf, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) - { -- enum hlsl_regset regset; -- -- var = extern_resources[i]; -- regset = hlsl_type_get_regset(var->data_type); -+ const struct extern_resource *resource = &extern_resources[i]; - -- if (regset == HLSL_REGSET_SAMPLERS) -- write_sm4_dcl_samplers(&buffer, var); -- else if (regset == HLSL_REGSET_TEXTURES) -- write_sm4_dcl_textures(ctx, &buffer, var, false); -- else if (regset == HLSL_REGSET_UAVS) -- write_sm4_dcl_textures(ctx, &buffer, var, true); -+ if (resource->regset == HLSL_REGSET_SAMPLERS) -+ write_sm4_dcl_samplers(&tpf, resource); -+ else if (resource->regset == HLSL_REGSET_TEXTURES) -+ write_sm4_dcl_textures(&tpf, resource, false); -+ else if (resource->regset == HLSL_REGSET_UAVS) -+ write_sm4_dcl_textures(&tpf, resource, true); - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -- write_sm4_dcl_semantic(ctx, &buffer, var); -+ write_sm4_dcl_semantic(&tpf, var); - } - - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) -- write_sm4_dcl_thread_group(&buffer, ctx->thread_count); -+ write_sm4_dcl_thread_group(&tpf, ctx->thread_count); - - if (ctx->temp_count) -- write_sm4_dcl_temps(&buffer, ctx->temp_count); -+ write_sm4_dcl_temps(&tpf, ctx->temp_count); - -- write_sm4_block(ctx, &buffer, &entry_func->body); -+ write_sm4_block(&tpf, &entry_func->body); - -- write_sm4_ret(&buffer); -+ write_sm4_ret(&tpf); - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - -- add_section(dxbc, TAG_SHDR, &buffer); -+ add_section(ctx, dxbc, TAG_SHDR, &buffer); - -- vkd3d_free(extern_resources); -+ sm4_free_extern_resources(extern_resources, extern_resources_count); - } - - int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 343fdb2252e..0245d83a10b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -415,6 +415,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t - return "hlsl"; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - return "d3dbc"; -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ return "dxil"; - default: - FIXME("Unhandled source type %#x.\n", type); - return "bin"; -@@ -440,6 +442,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, - shader_get_source_type_suffix(source_type), shader->code, shader->size); - } - -+static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) -+{ -+ struct vkd3d_shader_scan_signature_info *signature_info; -+ -+ if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) -+ { -+ memset(&signature_info->input, 0, sizeof(signature_info->input)); -+ memset(&signature_info->output, 0, sizeof(signature_info->output)); -+ memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); -+ } -+} -+ - bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_message_context *message_context, const char *source_name, - const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, -@@ -526,9 +540,46 @@ void vkd3d_shader_free_messages(char *messages) - vkd3d_free(messages); - } - -+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, -+ const struct shader_signature *src) -+{ -+ unsigned int i; -+ -+ signature->element_count = src->element_count; -+ if (!src->elements) -+ { -+ assert(!signature->element_count); -+ signature->elements = NULL; -+ return true; -+ } -+ -+ if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) -+ return false; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ struct vkd3d_shader_signature_element *d = &signature->elements[i]; -+ struct signature_element *e = &src->elements[i]; -+ -+ d->semantic_name = e->semantic_name; -+ d->semantic_index = e->semantic_index; -+ d->stream_index = e->stream_index; -+ d->sysval_semantic = e->sysval_semantic; -+ d->component_type = e->component_type; -+ d->register_index = e->register_index; -+ if (e->register_count > 1) -+ FIXME("Arrayed elements are not supported yet.\n"); -+ d->mask = e->mask; -+ d->used_mask = e->used_mask; -+ d->min_precision = e->min_precision; -+ } -+ -+ return true; -+} -+ - struct vkd3d_shader_scan_context - { -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; - size_t descriptors_size; - - struct vkd3d_shader_message_context *message_context; -@@ -548,20 +599,12 @@ struct vkd3d_shader_scan_context - size_t cf_info_size; - size_t cf_info_count; - -- struct -- { -- unsigned int id; -- unsigned int descriptor_idx; -- } *uav_ranges; -- size_t uav_ranges_size; -- size_t uav_range_count; -- - enum vkd3d_shader_api_version api_version; - }; - - static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context) - { - unsigned int i; -@@ -584,7 +627,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con - - static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) - { -- vkd3d_free(context->uav_ranges); - vkd3d_free(context->cf_info); - } - -@@ -652,18 +694,24 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf - return NULL; - } - --static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( -- const struct vkd3d_shader_scan_context *context, unsigned int range_id) -+static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, -+ const struct vkd3d_shader_register *reg, uint32_t flag) - { -+ unsigned int range_id = reg->idx[0].offset; - unsigned int i; - -- for (i = 0; i < context->uav_range_count; ++i) -+ if (!context->scan_descriptor_info) -+ return; -+ -+ for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) - { -- if (context->uav_ranges[i].id == range_id) -- return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; -+ if (context->scan_descriptor_info->descriptors[i].type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV -+ && context->scan_descriptor_info->descriptors[i].register_id == range_id) -+ { -+ context->scan_descriptor_info->descriptors[i].flags |= flag; -+ break; -+ } - } -- -- return NULL; - } - - static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) -@@ -679,13 +727,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr - static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); - } - - static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) -@@ -698,13 +740,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in - static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); - } - - static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) -@@ -717,93 +753,76 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ - static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); - } - --static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, -- enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, -- enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, -- unsigned int flags) -+static struct vkd3d_shader_descriptor_info1 *vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, -+ enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, -+ const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, -+ enum vkd3d_shader_resource_data_type resource_data_type) - { -- struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; -- struct vkd3d_shader_descriptor_info *d; -+ struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; -+ struct vkd3d_shader_descriptor_info1 *d; - - if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, - info->descriptor_count + 1, sizeof(*info->descriptors))) - { - ERR("Failed to allocate descriptor info.\n"); -- return false; -+ return NULL; - } - - d = &info->descriptors[info->descriptor_count]; -+ memset(d, 0, sizeof(*d)); - d->type = type; -+ d->register_id = reg->idx[0].offset; - d->register_space = range->space; - d->register_index = range->first; - d->resource_type = resource_type; - d->resource_data_type = resource_data_type; -- d->flags = flags; - d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; - ++info->descriptor_count; - -- return true; --} -- --static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, -- unsigned int id, unsigned int descriptor_idx) --{ -- if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, -- context->uav_range_count + 1, sizeof(*context->uav_ranges))) -- { -- ERR("Failed to allocate UAV range.\n"); -- return false; -- } -- -- context->uav_ranges[context->uav_range_count].id = id; -- context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; -- ++context->uav_range_count; -- -- return true; -+ return d; - } - - static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; -+ struct vkd3d_shader_descriptor_info1 *d; - - if (!context->scan_descriptor_info) - return; - -- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); -+ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, -+ &cb->src.reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) -+ return; -+ d->buffer_size = cb->size * 16; - } - - static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; -- unsigned int flags; -+ struct vkd3d_shader_descriptor_info1 *d; - - if (!context->scan_descriptor_info) - return; - -+ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, -+ &sampler->src.reg, &sampler->range, VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT))) -+ return; -+ - if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE) -- flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; -- else -- flags = 0; -- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, -- VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); -+ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; - } - - static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, -- enum vkd3d_shader_resource_data_type resource_data_type) -+ enum vkd3d_shader_resource_data_type resource_data_type, -+ unsigned int sample_count, unsigned int structure_stride, bool raw) - { -+ struct vkd3d_shader_descriptor_info1 *d; - enum vkd3d_shader_descriptor_type type; - - if (!context->scan_descriptor_info) -@@ -813,10 +832,13 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont - type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; - else - type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; -- vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); -- if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -- vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, -- context->scan_descriptor_info->descriptor_count - 1); -+ if (!(d = vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, -+ &resource->range, resource_type, resource_data_type))) -+ return; -+ d->sample_count = sample_count; -+ d->structure_stride = structure_stride; -+ if (raw) -+ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; - } - - static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, -@@ -875,7 +897,7 @@ static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_sca - } - - vkd3d_shader_scan_resource_declaration(context, &semantic->resource, -- semantic->resource_type, resource_data_type); -+ semantic->resource_type, resource_data_type, semantic->sample_count, 0, false); - } - - static void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, -@@ -909,12 +931,13 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - case VKD3DSIH_DCL_RESOURCE_RAW: - case VKD3DSIH_DCL_UAV_RAW: - vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.raw_resource.resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); -+ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, 0, true); - break; - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_UAV_STRUCTURED: - vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.structured_resource.resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); -+ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, -+ instruction->declaration.structured_resource.byte_stride, false); - break; - case VKD3DSIH_IF: - cf_info = vkd3d_shader_scan_push_cf_info(context); -@@ -1066,22 +1089,64 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - return VKD3D_OK; - } - -+static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, -+ const struct vkd3d_shader_scan_descriptor_info1 *info1) -+{ -+ unsigned int i; -+ -+ if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ for (i = 0; i < info1->descriptor_count; ++i) -+ { -+ const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; -+ struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; -+ -+ dst->type = src->type; -+ dst->register_space = src->register_space; -+ dst->register_index = src->register_index; -+ dst->resource_type = src->resource_type; -+ dst->resource_data_type = src->resource_data_type; -+ dst->flags = src->flags; -+ dst->count = src->count; -+ } -+ info->descriptor_count = info1->descriptor_count; -+ -+ return VKD3D_OK; -+} -+ -+static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) -+{ -+ TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); -+ -+ vkd3d_free(scan_descriptor_info->descriptors); -+} -+ - static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) -+ struct vkd3d_shader_message_context *message_context, -+ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) - { -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; -+ struct vkd3d_shader_scan_descriptor_info *descriptor_info; -+ struct vkd3d_shader_scan_signature_info *signature_info; - struct vkd3d_shader_instruction *instruction; - struct vkd3d_shader_scan_context context; - int ret = VKD3D_OK; - unsigned int i; - -- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) -+ descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); -+ if (descriptor_info1) - { -- scan_descriptor_info->descriptors = NULL; -- scan_descriptor_info->descriptor_count = 0; -+ descriptor_info1->descriptors = NULL; -+ descriptor_info1->descriptor_count = 0; - } -+ else if (descriptor_info) -+ { -+ descriptor_info1 = &local_descriptor_info1; -+ } -+ signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); - -- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); -+ vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context); - - if (TRACE_ON()) - { -@@ -1092,13 +1157,52 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info - { - instruction = &parser->instructions.elements[i]; - if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) -- { -- if (scan_descriptor_info) -- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); - break; -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) -+ { -+ unsigned int size = parser->shader_desc.flat_constant_count[i].external; -+ struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; -+ struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; -+ struct vkd3d_shader_descriptor_info1 *d; -+ -+ if (parser->shader_desc.flat_constant_count[i].external) -+ { -+ if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, -+ &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) -+ d->buffer_size = size * 16; - } - } - -+ if (!ret && signature_info) -+ { -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, -+ &parser->shader_desc.output_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, -+ &parser->shader_desc.patch_constant_signature)) -+ { -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ -+ if (!ret && descriptor_info) -+ ret = convert_descriptor_info(descriptor_info, descriptor_info1); -+ -+ if (ret < 0) -+ { -+ if (descriptor_info) -+ vkd3d_shader_free_scan_descriptor_info(descriptor_info); -+ if (descriptor_info1) -+ vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); -+ if (signature_info) -+ vkd3d_shader_free_scan_signature_info(signature_info); -+ } -+ else -+ { -+ vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); -+ } - vkd3d_shader_scan_context_cleanup(&context); - return ret; - } -@@ -1115,7 +1219,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -- ret = scan_with_parser(compile_info, message_context, parser); -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -@@ -1133,7 +1237,25 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -- ret = scan_with_parser(compile_info, message_context, parser); -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); -+ vkd3d_shader_parser_destroy(parser); -+ -+ return ret; -+} -+ -+static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_parser *parser; -+ int ret; -+ -+ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; -+ } -+ -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -@@ -1152,6 +1274,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1169,6 +1293,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - ret = scan_d3dbc(compile_info, &message_context); - break; - -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = scan_dxil(compile_info, &message_context); -+ break; -+ - default: - ERR("Unsupported source type %#x.\n", compile_info->source_type); - ret = VKD3D_ERROR_INVALID_ARGUMENT; -@@ -1186,7 +1314,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -- struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; - struct vkd3d_glsl_generator *glsl_generator; - struct vkd3d_shader_compile_info scan_info; - int ret; -@@ -1194,11 +1322,8 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); - - scan_info = *compile_info; -- scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; -- scan_descriptor_info.next = scan_info.next; -- scan_info.next = &scan_descriptor_info; - -- if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) -+ if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) - return ret; - - switch (compile_info->target_type) -@@ -1212,7 +1337,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - message_context, &parser->location))) - { - ERR("Failed to create GLSL generator.\n"); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - return VKD3D_ERROR; - } - -@@ -1230,7 +1355,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, - assert(0); - } - -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - return ret; - } - -@@ -1291,6 +1416,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ - return VKD3D_ERROR; - } - -+static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_parser *parser; -+ int ret; -+ -+ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; -+ } -+ -+ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -+ -+ vkd3d_shader_parser_destroy(parser); -+ return ret; -+} -+ - int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, char **messages) - { -@@ -1305,6 +1448,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1321,6 +1466,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - ret = compile_d3d_bytecode(compile_info, out, &message_context); - break; - -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = compile_dxbc_dxil(compile_info, out, &message_context); -+ break; -+ - default: - vkd3d_unreachable(); - } -@@ -1339,6 +1488,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ - vkd3d_free(scan_descriptor_info->descriptors); - } - -+void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) -+{ -+ TRACE("info %p.\n", info); -+ -+ vkd3d_shader_free_shader_signature(&info->input); -+ vkd3d_shader_free_shader_signature(&info->output); -+ vkd3d_shader_free_shader_signature(&info->patch_constant); -+} -+ - void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) - { - TRACE("shader_code %p.\n", shader_code); -@@ -1401,43 +1559,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu - desc->version = 0; - } - --static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, -- const struct shader_signature *src) --{ -- unsigned int i; -- -- signature->element_count = src->element_count; -- if (!src->elements) -- { -- assert(!signature->element_count); -- signature->elements = NULL; -- return true; -- } -- -- if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) -- return false; -- -- for (i = 0; i < signature->element_count; ++i) -- { -- struct vkd3d_shader_signature_element *d = &signature->elements[i]; -- struct signature_element *e = &src->elements[i]; -- -- d->semantic_name = e->semantic_name; -- d->semantic_index = e->semantic_index; -- d->stream_index = e->stream_index; -- d->sysval_semantic = e->sysval_semantic; -- d->component_type = e->component_type; -- d->register_index = e->register_index; -- if (e->register_count > 1) -- FIXME("Arrayed elements are not supported yet.\n"); -- d->mask = e->mask; -- d->used_mask = e->used_mask; -- d->min_precision = e->min_precision; -- } -- -- return true; --} -- - void shader_signature_cleanup(struct shader_signature *signature) - { - vkd3d_free(signature->elements); -@@ -1526,6 +1647,9 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns - VKD3D_SHADER_SOURCE_DXBC_TPF, - VKD3D_SHADER_SOURCE_HLSL, - VKD3D_SHADER_SOURCE_D3D_BYTECODE, -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ VKD3D_SHADER_SOURCE_DXBC_DXIL, -+#endif - }; - - TRACE("count %p.\n", count); -@@ -1564,6 +1688,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - - switch (source_type) - { -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+#endif - case VKD3D_SHADER_SOURCE_DXBC_TPF: - *count = ARRAY_SIZE(dxbc_tpf_types); - return dxbc_tpf_types; -@@ -1649,9 +1776,6 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, - { - void *params; - -- if (!count) -- return NULL; -- - if (count > allocator->count - allocator->index) - { - struct vkd3d_shader_param_node *next = shader_param_allocator_node_create(allocator); -@@ -1792,3 +1916,41 @@ void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *ins - vkd3d_free(instructions->icbs[i]); - vkd3d_free(instructions->icbs); - } -+ -+void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) -+{ -+ unsigned int count = 0; -+ unsigned int i; -+ -+ TRACE("output_signature %p, input_signature %p, ret_count %p, varyings %p.\n", -+ output_signature, input_signature, ret_count, varyings); -+ -+ for (i = 0; i < input_signature->element_count; ++i) -+ { -+ const struct vkd3d_shader_signature_element *input_element, *output_element; -+ -+ input_element = &input_signature->elements[i]; -+ -+ if (input_element->sysval_semantic != VKD3D_SHADER_SV_NONE) -+ continue; -+ -+ varyings[count].input_register_index = input_element->register_index; -+ varyings[count].input_mask = input_element->mask; -+ -+ if ((output_element = vkd3d_shader_find_signature_element(output_signature, -+ input_element->semantic_name, input_element->semantic_index, 0))) -+ { -+ varyings[count].output_signature_index = output_element - output_signature->elements; -+ } -+ else -+ { -+ varyings[count].output_signature_index = output_signature->element_count; -+ } -+ -+ ++count; -+ } -+ -+ *ret_count = count; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 406d53a3391..eab1c730ae9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -78,9 +78,14 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, - VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, - VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, -+ VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, -+ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, -+ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, - - VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, - VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, -+ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK = 1302, -+ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE = 1303, - - VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, - VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, -@@ -88,6 +93,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, - VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004, - -+ VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, -+ - VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, - VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, - VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE = 3002, -@@ -133,10 +140,14 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, - VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, - VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, -+ VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, - VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, -+ VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, -+ VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, -+ VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, - - VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, - -@@ -145,8 +156,33 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, - VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, - VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, -+ VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, - - VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, -+ -+ VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, -+ VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND = 8012, -+ -+ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, -+ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, -+ VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, -+ VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, -+ VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, -+ VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC = 8305, -+ -+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, - }; - - enum vkd3d_shader_opcode -@@ -486,6 +522,9 @@ enum vkd3d_shader_register_type - VKD3DSPR_DEPTHOUTLE, - VKD3DSPR_RASTERIZER, - VKD3DSPR_OUTSTENCILREF, -+ VKD3DSPR_UNDEF, -+ -+ VKD3DSPR_COUNT, - - VKD3DSPR_INVALID = ~0u, - }; -@@ -516,8 +555,14 @@ enum vkd3d_data_type - VKD3D_DATA_DOUBLE, - VKD3D_DATA_CONTINUED, - VKD3D_DATA_UNUSED, -+ VKD3D_DATA_UINT8, - }; - -+static inline bool data_type_is_integer(enum vkd3d_data_type data_type) -+{ -+ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT; -+} -+ - enum vkd3d_immconst_type - { - VKD3D_IMMCONST_SCALAR, -@@ -696,6 +741,9 @@ struct vkd3d_shader_register - } u; - }; - -+void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, -+ enum vkd3d_data_type data_type, unsigned int idx_count); -+ - struct vkd3d_shader_dst_param - { - struct vkd3d_shader_register reg; -@@ -784,6 +832,8 @@ enum vkd3d_shader_input_sysval_semantic - VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, - }; - -+#define SIGNATURE_TARGET_LOCATION_UNUSED (~0u) -+ - struct signature_element - { - unsigned int sort_index; -@@ -792,16 +842,21 @@ struct signature_element - unsigned int stream_index; - enum vkd3d_shader_sysval_semantic sysval_semantic; - enum vkd3d_shader_component_type component_type; -+ /* Register index in the source shader. */ - unsigned int register_index; - unsigned int register_count; - unsigned int mask; - unsigned int used_mask; - enum vkd3d_shader_minimum_precision min_precision; -+ /* Register index / location in the target shader. -+ * If SIGNATURE_TARGET_LOCATION_UNUSED, this element should not be written. */ -+ unsigned int target_location; - }; - - struct shader_signature - { - struct signature_element *elements; -+ size_t elements_capacity; - unsigned int element_count; - }; - -@@ -811,9 +866,17 @@ struct vkd3d_shader_desc - { - const uint32_t *byte_code; - size_t byte_code_size; -+ bool is_dxil; - struct shader_signature input_signature; - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; -+ -+ uint32_t temp_count; -+ -+ struct -+ { -+ uint32_t used, external; -+ } flat_constant_count[3]; - }; - - struct vkd3d_shader_register_semantic -@@ -945,6 +1008,8 @@ struct vkd3d_shader_instruction - } declaration; - }; - -+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); -+ - static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) - { - return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; -@@ -965,6 +1030,11 @@ static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_sh - return reg->type == VKD3DSPR_PATCHCONST; - } - -+static inline bool register_is_constant(const struct vkd3d_shader_register *reg) -+{ -+ return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); -+} -+ - struct vkd3d_shader_location - { - const char *source_name; -@@ -1066,6 +1136,27 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse - parser->ops->parser_destroy(parser); - } - -+struct vkd3d_shader_descriptor_info1 -+{ -+ enum vkd3d_shader_descriptor_type type; -+ unsigned int register_space; -+ unsigned int register_index; -+ unsigned int register_id; -+ enum vkd3d_shader_resource_type resource_type; -+ enum vkd3d_shader_resource_data_type resource_data_type; -+ unsigned int flags; -+ unsigned int sample_count; -+ unsigned int buffer_size; -+ unsigned int structure_stride; -+ unsigned int count; -+}; -+ -+struct vkd3d_shader_scan_descriptor_info1 -+{ -+ struct vkd3d_shader_descriptor_info1 *descriptors; -+ unsigned int descriptor_count; -+}; -+ - void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, - const struct vkd3d_shader_version *shader_version); - -@@ -1167,6 +1258,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); - int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); -+int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); - - void free_shader_desc(struct vkd3d_shader_desc *desc); - -@@ -1186,7 +1279,7 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); - #define SPIRV_MAX_SRC_COUNT 6 - - int spirv_compile(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - -@@ -1240,6 +1333,30 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( - } - } - -+static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resource_data_type( -+ enum vkd3d_shader_resource_data_type data_type) -+{ -+ switch (data_type) -+ { -+ case VKD3D_SHADER_RESOURCE_DATA_FLOAT: -+ case VKD3D_SHADER_RESOURCE_DATA_UNORM: -+ case VKD3D_SHADER_RESOURCE_DATA_SNORM: -+ return VKD3D_SHADER_COMPONENT_FLOAT; -+ case VKD3D_SHADER_RESOURCE_DATA_UINT: -+ return VKD3D_SHADER_COMPONENT_UINT; -+ case VKD3D_SHADER_RESOURCE_DATA_INT: -+ return VKD3D_SHADER_COMPONENT_INT; -+ case VKD3D_SHADER_RESOURCE_DATA_DOUBLE: -+ case VKD3D_SHADER_RESOURCE_DATA_CONTINUED: -+ return VKD3D_SHADER_COMPONENT_DOUBLE; -+ default: -+ FIXME("Unhandled data type %#x.\n", data_type); -+ /* fall-through */ -+ case VKD3D_SHADER_RESOURCE_DATA_MIXED: -+ return VKD3D_SHADER_COMPONENT_UINT; -+ } -+} -+ - enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, - unsigned int index); - -@@ -1339,6 +1456,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, - } - - #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) -+#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) - - #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') - #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') -@@ -1369,11 +1487,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - void dxbc_writer_init(struct dxbc_writer *dxbc); - int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); - --enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); --enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( -- struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); --enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, -- enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, -- struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info); - - #endif /* __VKD3D_SHADER_PRIVATE_H */ -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index c5bd687bd69..42a98763438 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF - static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); - static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, - struct d3d12_fence *fence, uint64_t value); -+static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); - static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); - static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); - -@@ -453,9 +454,9 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( - } - - /* ID3D12Fence */ --static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) -+static struct d3d12_fence *impl_from_ID3D12Fence1(ID3D12Fence1 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence1_iface); - } - - static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) -@@ -899,18 +900,19 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin - vkd3d_mutex_unlock(&fence->mutex); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence1 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Fence) -+ if (IsEqualGUID(riid, &IID_ID3D12Fence1) -+ || IsEqualGUID(riid, &IID_ID3D12Fence) - || IsEqualGUID(riid, &IID_ID3D12Pageable) - || IsEqualGUID(riid, &IID_ID3D12DeviceChild) - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -- ID3D12Fence_AddRef(iface); -+ ID3D12Fence1_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -921,9 +923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) -+static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - ULONG refcount = InterlockedIncrement(&fence->refcount); - - TRACE("%p increasing refcount to %u.\n", fence, refcount); -@@ -936,9 +938,9 @@ static void d3d12_fence_incref(struct d3d12_fence *fence) - InterlockedIncrement(&fence->internal_refcount); - } - --static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) -+static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - ULONG refcount = InterlockedDecrement(&fence->refcount); - - TRACE("%p decreasing refcount to %u.\n", fence, refcount); -@@ -971,10 +973,10 @@ static void d3d12_fence_decref(struct d3d12_fence *fence) - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence1 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -982,10 +984,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, - return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence1 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -993,37 +995,37 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, - return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence1 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&fence->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence1 *iface, const WCHAR *name) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence1 *iface, REFIID iid, void **device) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(fence->device, iid, device); - } - --static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) -+static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - uint64_t completed_value; - - TRACE("iface %p.\n", iface); -@@ -1034,10 +1036,10 @@ static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface - return completed_value; - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence1 *iface, - UINT64 value, HANDLE event) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - unsigned int i; - bool latch = false; - -@@ -1105,9 +1107,9 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen - return d3d12_device_flush_blocked_queues(fence->device); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence1 *iface, UINT64 value) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, value %#"PRIx64".\n", iface, value); - -@@ -1116,7 +1118,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v - return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); - } - --static const struct ID3D12FenceVtbl d3d12_fence_vtbl = -+static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(ID3D12Fence1 *iface) -+{ -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); -+ -+ TRACE("iface %p.\n", iface); -+ -+ return fence->flags; -+} -+ -+static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl = - { - /* IUnknown methods */ - d3d12_fence_QueryInterface, -@@ -1133,14 +1144,18 @@ static const struct ID3D12FenceVtbl d3d12_fence_vtbl = - d3d12_fence_GetCompletedValue, - d3d12_fence_SetEventOnCompletion, - d3d12_fence_Signal, -+ /* ID3D12Fence1 methods */ -+ d3d12_fence_GetCreationFlags, - }; - - static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) - { -- if (!iface) -+ ID3D12Fence1 *iface1; -+ -+ if (!(iface1 = (ID3D12Fence1 *)iface)) - return NULL; -- assert(iface->lpVtbl == &d3d12_fence_vtbl); -- return impl_from_ID3D12Fence(iface); -+ assert(iface1->lpVtbl == &d3d12_fence_vtbl); -+ return impl_from_ID3D12Fence1(iface1); - } - - static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, -@@ -1150,7 +1165,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * - VkResult vr; - HRESULT hr; - -- fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; -+ fence->ID3D12Fence1_iface.lpVtbl = &d3d12_fence_vtbl; - fence->internal_refcount = 1; - fence->refcount = 1; - -@@ -1161,7 +1176,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * - - vkd3d_cond_init(&fence->null_event_cond); - -- if (flags) -+ if ((fence->flags = flags)) - FIXME("Ignoring flags %#x.\n", flags); - - fence->events = NULL; -@@ -1315,32 +1330,26 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm - return hr; - } - -- allocator->current_command_list = list; -- -- return S_OK; --} -- --static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, -- struct d3d12_command_list *list) --{ -- struct d3d12_device *device = allocator->device; -- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- -- TRACE("allocator %p, list %p.\n", allocator, list); -- -- if (allocator->current_command_list == list) -- allocator->current_command_list = NULL; -- - if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, - allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) - { - WARN("Failed to add command buffer.\n"); - VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, - 1, &list->vk_command_buffer)); -- return; -+ return E_OUTOFMEMORY; - } -- - allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; -+ -+ allocator->current_command_list = list; -+ -+ return S_OK; -+} -+ -+static void d3d12_command_allocator_remove_command_list(struct d3d12_command_allocator *allocator, -+ const struct d3d12_command_list *list) -+{ -+ if (allocator->current_command_list == list) -+ allocator->current_command_list = NULL; - } - - static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) -@@ -1910,10 +1919,32 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, - return S_OK; - } - -+static void d3d12_command_signature_incref(struct d3d12_command_signature *signature) -+{ -+ vkd3d_atomic_increment(&signature->internal_refcount); -+} -+ -+static void d3d12_command_signature_decref(struct d3d12_command_signature *signature) -+{ -+ unsigned int refcount = vkd3d_atomic_decrement(&signature->internal_refcount); -+ -+ if (!refcount) -+ { -+ struct d3d12_device *device = signature->device; -+ -+ vkd3d_private_store_destroy(&signature->private_store); -+ -+ vkd3d_free((void *)signature->desc.pArgumentDescs); -+ vkd3d_free(signature); -+ -+ d3d12_device_release(device); -+ } -+} -+ - /* ID3D12CommandList */ --static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) -+static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList3(ID3D12GraphicsCommandList3 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); - } - - static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) -@@ -2259,12 +2290,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList3 *iface, - REFIID iid, void **object) - { - TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); - -- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) -+ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) -+ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) - || IsEqualGUID(iid, &IID_ID3D12CommandList) -@@ -2272,7 +2304,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - || IsEqualGUID(iid, &IID_ID3D12Object) - || IsEqualGUID(iid, &IID_IUnknown)) - { -- ID3D12GraphicsCommandList2_AddRef(iface); -+ ID3D12GraphicsCommandList3_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -2283,9 +2315,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - ULONG refcount = InterlockedIncrement(&list->refcount); - - TRACE("%p increasing refcount to %u.\n", list, refcount); -@@ -2298,9 +2330,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind - vkd3d_free(bindings->vk_uav_counter_views); - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - ULONG refcount = InterlockedDecrement(&list->refcount); - - TRACE("%p decreasing refcount to %u.\n", list, refcount); -@@ -2313,7 +2345,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - - /* When command pool is destroyed, all command buffers are implicitly freed. */ - if (list->allocator) -- d3d12_command_allocator_free_command_buffer(list->allocator, list); -+ d3d12_command_allocator_remove_command_list(list->allocator, list); - - vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); - vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); -@@ -2326,66 +2358,66 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_get_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_set_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&list->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList3 *iface, const WCHAR *name) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **device) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(list->device, iid, device); - } - --static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) -+static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p.\n", iface); - - return list->type; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - VkResult vr; - -@@ -2411,7 +2443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - - if (list->allocator) - { -- d3d12_command_allocator_free_command_buffer(list->allocator, list); -+ d3d12_command_allocator_remove_command_list(list->allocator, list); - list->allocator = NULL; - } - -@@ -2429,7 +2461,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - ID3D12PipelineState *initial_pipeline_state) - { -- ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; -+ ID3D12GraphicsCommandList3 *iface = &list->ID3D12GraphicsCommandList3_iface; - - memset(list->strides, 0, sizeof(list->strides)); - list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; -@@ -2465,14 +2497,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - - list->descriptor_heap_count = 0; - -- ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); -+ ID3D12GraphicsCommandList3_SetPipelineState(iface, initial_pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList3 *iface, - ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) - { - struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - HRESULT hr; - - TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", -@@ -2499,7 +2531,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL - return hr; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList3 *iface, - ID3D12PipelineState *pipeline_state) - { - FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); -@@ -3185,6 +3217,23 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) - } - } - -+static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) -+{ -+ if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) -+ { -+ if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -+ { -+ /* Descriptors can be written after binding. */ -+ FIXME("Flushing descriptor updates while list %p is not closed.\n", list); -+ vkd3d_mutex_lock(&heap->vk_sets_mutex); -+ d3d12_desc_flush_vk_heap_updates_locked(heap, list->device); -+ vkd3d_mutex_unlock(&heap->vk_sets_mutex); -+ return; -+ } -+ list->descriptor_heaps[list->descriptor_heap_count++] = heap; -+ } -+} -+ - static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, - enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) - { -@@ -3209,18 +3258,6 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l - bindings->sampler_heap_id = heap->serial_id; - } - -- if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) -- { -- if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -- { -- /* Descriptors can be written after binding. */ -- FIXME("Flushing descriptor updates while list %p is not closed.\n", list); -- command_list_flush_vk_heap_updates(list); -- list->descriptor_heap_count = 0; -- } -- list->descriptor_heaps[list->descriptor_heap_count++] = heap; -- } -- - vkd3d_mutex_lock(&heap->vk_sets_mutex); - - for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) -@@ -3353,11 +3390,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList3 *iface, - UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, - UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " -@@ -3377,11 +3414,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom - instance_count, start_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList3 *iface, - UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, - INT base_vertex_location, UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " -@@ -3403,10 +3440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap - instance_count, start_vertex_location, base_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList3 *iface, - UINT x, UINT y, UINT z) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); -@@ -3422,10 +3459,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL - VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy buffer_copy; -@@ -3624,7 +3661,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ - static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, - struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, - const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, -- unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) -+ unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) - { - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; -@@ -3651,6 +3688,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - buffer_image_copy.bufferImageHeight = 0; - vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, - src_format, src_sub_resource_idx, src_desc->MipLevels); -+ buffer_image_copy.imageSubresource.layerCount = layer_count; - src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - buffer_image_copy.imageOffset.x = 0; - buffer_image_copy.imageOffset.y = 0; -@@ -3658,7 +3696,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); - - buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * -- buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; -+ buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; - if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) - { - ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); -@@ -3684,6 +3722,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - - vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, - dst_format, dst_sub_resource_idx, dst_desc->MipLevels); -+ buffer_image_copy.imageSubresource.layerCount = layer_count; - dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - - assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == -@@ -3705,11 +3744,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) - && box->back > box->front; - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList3 *iface, - const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, - const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *src_format, *dst_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3813,7 +3852,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - { - d3d12_command_list_copy_incompatible_texture_region(list, - dst_resource, dst->u.SubresourceIndex, dst_format, -- src_resource, src->u.SubresourceIndex, src_format); -+ src_resource, src->u.SubresourceIndex, src_format, 1); - return; - } - -@@ -3830,11 +3869,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, ID3D12Resource *src) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; -+ const struct vkd3d_format *dst_format, *src_format; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy vk_buffer_copy; - VkImageCopy vk_image_copy; -@@ -3867,16 +3907,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - else - { - layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); -+ dst_format = dst_resource->format; -+ src_format = src_resource->format; - - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); - assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); - assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); - -+ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) -+ { -+ for (i = 0; i < dst_resource->desc.MipLevels; ++i) -+ { -+ d3d12_command_list_copy_incompatible_texture_region(list, -+ dst_resource, i, dst_format, -+ src_resource, i, src_format, layer_count); -+ } -+ return; -+ } -+ - for (i = 0; i < dst_resource->desc.MipLevels; ++i) - { - vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, -- src_resource->format, dst_resource->format, NULL, 0, 0, 0); -+ src_format, dst_format, NULL, 0, 0, 0); - vk_image_copy.dstSubresource.layerCount = layer_count; - vk_image_copy.srcSubresource.layerCount = layer_count; - VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, -@@ -3886,7 +3939,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, - const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, - D3D12_TILE_COPY_FLAGS flags) -@@ -3897,11 +3950,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand - buffer, buffer_offset, flags); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, UINT dst_sub_resource_idx, - ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_format *src_format, *dst_format, *vk_format; - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3964,10 +4017,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList3 *iface, - D3D12_PRIMITIVE_TOPOLOGY topology) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, topology %#x.\n", iface, topology); - -@@ -3978,11 +4031,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList3 *iface, - UINT viewport_count, const D3D12_VIEWPORT *viewports) - { - VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; - -@@ -4016,10 +4069,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo - VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList3 *iface, - UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -4044,10 +4097,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic - VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList3 *iface, - const FLOAT blend_factor[4]) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); -@@ -4056,10 +4109,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics - VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList3 *iface, - UINT stencil_ref) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); -@@ -4068,11 +4121,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC - VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList3 *iface, - ID3D12PipelineState *pipeline_state) - { - struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); - -@@ -4123,10 +4176,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA - return 0; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList3 *iface, - UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - bool have_aliasing_barriers = false, have_split_barriers = false; - const struct vkd3d_vk_device_procs *vk_procs; - const struct vkd3d_vulkan_info *vk_info; -@@ -4349,13 +4402,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList3 *iface, - ID3D12GraphicsCommandList *command_list) - { - FIXME("iface %p, command_list %p stub!\n", iface, command_list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList3 *iface, - UINT heap_count, ID3D12DescriptorHeap *const *heaps) - { - TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); -@@ -4381,10 +4434,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis - d3d12_command_list_invalidate_root_parameters(list, bind_point); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList3 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4392,10 +4445,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G - unsafe_impl_from_ID3D12RootSignature(root_signature)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList3 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4408,6 +4461,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - { - struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; - const struct d3d12_root_signature *root_signature = bindings->root_signature; -+ struct d3d12_descriptor_heap *descriptor_heap; - struct d3d12_desc *desc; - - assert(root_signature_get_descriptor_table(root_signature, index)); -@@ -4418,15 +4472,25 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - if (bindings->descriptor_tables[index] == desc) - return; - -+ descriptor_heap = d3d12_desc_get_descriptor_heap(desc); -+ if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) -+ { -+ /* GetGPUDescriptorHandleForHeapStart() returns a null handle in this case, -+ * but a CPU handle could be passed. */ -+ WARN("Descriptor heap %p is not shader visible.\n", descriptor_heap); -+ return; -+ } -+ command_list_add_descriptor_heap(list, descriptor_heap); -+ - bindings->descriptor_tables[index] = desc; - bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; - bindings->descriptor_table_active_mask |= (uint64_t)1 << index; - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", - iface, root_parameter_index, base_descriptor.ptr); -@@ -4435,10 +4499,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I - root_parameter_index, base_descriptor); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", - iface, root_parameter_index, base_descriptor.ptr); -@@ -4460,10 +4524,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis - c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4472,10 +4536,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4484,10 +4548,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4496,10 +4560,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID - root_parameter_index, dst_offset, constant_count, data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4561,9 +4625,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4572,9 +4636,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4633,9 +4697,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4645,9 +4709,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4657,9 +4721,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4669,9 +4733,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4680,10 +4744,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV - root_parameter_index, address); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList3 *iface, - const D3D12_INDEX_BUFFER_VIEW *view) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - struct d3d12_resource *resource; - enum VkIndexType index_type; -@@ -4723,10 +4787,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics - view->BufferLocation - resource->gpu_address, index_type)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList3 *iface, - UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_null_resources *null_resources; - struct vkd3d_gpu_va_allocator *gpu_va_allocator; - VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; -@@ -4781,10 +4845,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList3 *iface, - UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; - VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; - VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; -@@ -4846,11 +4910,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm - VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList3 *iface, - UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, - BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_rtv_desc *rtv_desc; - const struct d3d12_dsv_desc *dsv_desc; - VkFormat prev_dsv_format; -@@ -5051,12 +5115,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList3 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, - UINT rect_count, const D3D12_RECT *rects) - { - const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference ds_reference; -@@ -5100,10 +5164,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra - &clear_value, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList3 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference color_reference; -@@ -5348,11 +5412,11 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList3 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const UINT values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; - struct vkd3d_texture_view_desc view_desc; -@@ -5414,11 +5478,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - vkd3d_view_decref(uint_view, device); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList3 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const float values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource_impl; - VkClearColorValue colour; - struct vkd3d_view *view; -@@ -5434,16 +5498,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I - d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) - { - FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - VkQueryControlFlags flags = 0; -@@ -5470,10 +5534,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman - VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - -@@ -5515,12 +5579,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) - return sizeof(uint64_t); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, - ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) - { - const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i, first, count; -@@ -5596,10 +5660,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); - const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -5668,19 +5732,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList3 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList3 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) -+static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList3 *iface) - { - FIXME("iface %p stub!\n", iface); - } -@@ -5689,14 +5753,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN - STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); - STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList3 *iface, - ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, - UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) - { - struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); - struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); - struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -5714,6 +5778,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - return; - } - -+ d3d12_command_signature_incref(sig_impl); -+ - signature_desc = &sig_impl->desc; - for (i = 0; i < signature_desc->NumArgumentDescs; ++i) - { -@@ -5776,6 +5842,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - if (!d3d12_command_list_update_compute_state(list)) - { - WARN("Failed to update compute state, ignoring dispatch.\n"); -+ d3d12_command_signature_decref(sig_impl); - return; - } - -@@ -5788,9 +5855,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - break; - } - } -+ -+ d3d12_command_signature_decref(sig_impl); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5803,7 +5872,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5816,20 +5885,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList3 *iface, - FLOAT min, FLOAT max) - { - FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList3 *iface, - UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) - { - FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", - iface, sample_count, pixel_count, sample_positions); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, - ID3D12Resource *src_resource, UINT src_sub_resource_idx, - D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) -@@ -5841,16 +5910,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 - src_resource, src_sub_resource_idx, src_rect, format, mode); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) -+static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList3 *iface, UINT mask) - { - FIXME("iface %p, mask %#x stub!\n", iface, mask); - } - --static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList3 *iface, - UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, - const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource; - unsigned int i; - -@@ -5863,7 +5932,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap - } - } - --static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = -+static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList3 *iface, -+ ID3D12ProtectedResourceSession *protected_session) -+{ -+ FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); -+} -+ -+static const struct ID3D12GraphicsCommandList3Vtbl d3d12_command_list_vtbl = - { - /* IUnknown methods */ - d3d12_command_list_QueryInterface, -@@ -5939,6 +6014,8 @@ static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = - d3d12_command_list_SetViewInstanceMask, - /* ID3D12GraphicsCommandList2 methods */ - d3d12_command_list_WriteBufferImmediate, -+ /* ID3D12GraphicsCommandList3 methods */ -+ d3d12_command_list_SetProtectedResourceSession, - }; - - static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) -@@ -5946,7 +6023,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma - if (!iface) - return NULL; - assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); - } - - static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, -@@ -5955,7 +6032,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d - { - HRESULT hr; - -- list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; -+ list->ID3D12GraphicsCommandList3_iface.lpVtbl = &d3d12_command_list_vtbl; - list->refcount = 1; - - list->type = type; -@@ -6063,8 +6140,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if - return refcount; - } - -+static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) -+{ -+ switch (op->opcode) -+ { -+ case VKD3D_CS_OP_WAIT: -+ d3d12_fence_decref(op->u.wait.fence); -+ break; -+ -+ case VKD3D_CS_OP_SIGNAL: -+ d3d12_fence_decref(op->u.signal.fence); -+ break; -+ -+ case VKD3D_CS_OP_EXECUTE: -+ vkd3d_free(op->u.execute.buffers); -+ break; -+ -+ case VKD3D_CS_OP_UPDATE_MAPPINGS: -+ case VKD3D_CS_OP_COPY_MAPPINGS: -+ break; -+ } -+} -+ - static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) - { -+ unsigned int i; -+ -+ for (i = 0; i < array->count; ++i) -+ d3d12_command_queue_destroy_op(&array->ops[i]); -+ - vkd3d_free(array->ops); - } - -@@ -6162,17 +6266,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc - return &array->ops[array->count++]; - } - -+static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) -+{ -+ void *buffer; -+ -+ *dst = NULL; -+ if (src) -+ { -+ if (!(buffer = vkd3d_calloc(count, elem_size))) -+ return false; -+ memcpy(buffer, src, count * elem_size); -+ *dst = buffer; -+ } -+ return true; -+} -+ -+static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) -+{ -+ vkd3d_free(update_mappings->region_start_coordinates); -+ vkd3d_free(update_mappings->region_sizes); -+ vkd3d_free(update_mappings->range_flags); -+ vkd3d_free(update_mappings->heap_range_offsets); -+ vkd3d_free(update_mappings->range_tile_counts); -+} -+ - static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, - ID3D12Resource *resource, UINT region_count, - const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, - ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " -+ struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); -+ struct vkd3d_cs_update_mappings update_mappings = {0}; -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " - "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " -- "range_tile_counts %p, flags %#x stub!\n", -+ "range_tile_counts %p, flags %#x.\n", - iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, - range_flags, heap_range_offsets, range_tile_counts, flags); -+ -+ if (!region_count || !range_count) -+ return; -+ -+ if (!command_queue->supports_sparse_binding) -+ { -+ FIXME("Command queue %p does not support sparse binding.\n", command_queue); -+ return; -+ } -+ -+ if (!resource_impl->tiles.subresource_count) -+ { -+ WARN("Resource %p is not a tiled resource.\n", resource_impl); -+ return; -+ } -+ -+ if (region_count > 1 && !region_start_coordinates) -+ { -+ WARN("Region start coordinates must not be NULL when region count is > 1.\n"); -+ return; -+ } -+ -+ if (range_count > 1 && !range_tile_counts) -+ { -+ WARN("Range tile counts must not be NULL when range count is > 1.\n"); -+ return; -+ } -+ -+ update_mappings.resource = resource_impl; -+ update_mappings.heap = heap_impl; -+ if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, -+ region_start_coordinates, sizeof(*region_start_coordinates), region_count)) -+ { -+ ERR("Failed to allocate region start coordinates.\n"); -+ return; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.region_sizes, -+ region_sizes, sizeof(*region_sizes), region_count)) -+ { -+ ERR("Failed to allocate region sizes.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_flags, -+ range_flags, sizeof(*range_flags), range_count)) -+ { -+ ERR("Failed to allocate range flags.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, -+ heap_range_offsets, sizeof(*heap_range_offsets), range_count)) -+ { -+ ERR("Failed to allocate heap range offsets.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, -+ range_tile_counts, sizeof(*range_tile_counts), range_count)) -+ { -+ ERR("Failed to allocate range tile counts.\n"); -+ goto free_clones; -+ } -+ update_mappings.region_count = region_count; -+ update_mappings.range_count = range_count; -+ update_mappings.flags = flags; -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ goto unlock_mutex; -+ } -+ -+ op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; -+ op->u.update_mappings = update_mappings; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+ return; -+ -+unlock_mutex: -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+free_clones: -+ update_mappings_cleanup(&update_mappings); - } - - static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, -@@ -6183,10 +6401,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command - const D3D12_TILE_REGION_SIZE *region_size, - D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -- "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", -+ struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); -+ struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -+ "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", - iface, dst_resource, dst_region_start_coordinate, src_resource, - src_region_start_coordinate, region_size, flags); -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ return; -+ } -+ op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; -+ op->u.copy_mappings.dst_resource = dst_resource_impl; -+ op->u.copy_mappings.src_resource = src_resource_impl; -+ op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; -+ op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; -+ op->u.copy_mappings.region_size = *region_size; -+ op->u.copy_mappings.flags = flags; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); - } - - static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, -@@ -6214,8 +6456,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu - ERR("Failed to submit queue(s), vr %d.\n", vr); - - vkd3d_queue_release(vkd3d_queue); -- -- vkd3d_free(buffers); - } - - static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) -@@ -6273,7 +6513,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { - ERR("Failed to add op.\n"); -- return; -+ goto done; - } - op->opcode = VKD3D_CS_OP_EXECUTE; - op->u.execute.buffers = buffers; -@@ -6281,6 +6521,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm - - d3d12_command_queue_submit_locked(command_queue); - -+done: - vkd3d_mutex_unlock(&command_queue->op_mutex); - return; - } -@@ -6348,6 +6589,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * - - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { -+ ERR("Failed to add op.\n"); - hr = E_OUTOFMEMORY; - goto done; - } -@@ -6686,6 +6928,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if - - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { -+ ERR("Failed to add op.\n"); - hr = E_OUTOFMEMORY; - goto done; - } -@@ -6922,22 +7165,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - return d3d12_command_queue_fixup_after_flush_locked(queue); - } - d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); -- d3d12_fence_decref(fence); - break; - - case VKD3D_CS_OP_SIGNAL: - d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); -- d3d12_fence_decref(op->u.signal.fence); - break; - - case VKD3D_CS_OP_EXECUTE: - d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); - break; - -+ case VKD3D_CS_OP_UPDATE_MAPPINGS: -+ FIXME("Tiled resource binding is not supported yet.\n"); -+ update_mappings_cleanup(&op->u.update_mappings); -+ break; -+ -+ case VKD3D_CS_OP_COPY_MAPPINGS: -+ FIXME("Tiled resource mapping copying is not supported yet.\n"); -+ break; -+ - default: - vkd3d_unreachable(); - } - -+ d3d12_command_queue_destroy_op(op); -+ - *flushed_any |= true; - } - -@@ -7000,6 +7252,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, - if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) - goto fail_destroy_op_mutex; - -+ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); -+ - d3d12_device_add_ref(queue->device = device); - - return S_OK; -@@ -7105,16 +7359,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSign - TRACE("%p decreasing refcount to %u.\n", signature, refcount); - - if (!refcount) -- { -- struct d3d12_device *device = signature->device; -- -- vkd3d_private_store_destroy(&signature->private_store); -- -- vkd3d_free((void *)signature->desc.pArgumentDescs); -- vkd3d_free(signature); -- -- d3d12_device_release(device); -- } -+ d3d12_command_signature_decref(signature); - - return refcount; - } -@@ -7221,6 +7466,7 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_ - - object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; - object->refcount = 1; -+ object->internal_refcount = 1; - - object->desc = *desc; - if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 4263dcf4184..c33061073a3 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; -+ vulkan_info->sparse_binding = features->sparseBinding; -+ vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; - vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; - vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; -@@ -2433,34 +2435,39 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) - - static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) - { -- cache->head = NULL; -+ memset(cache, 0, sizeof(*cache)); - cache->size = size; - } - - static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) - { - union d3d12_desc_object u; -+ unsigned int i; - void *next; - -- for (u.object = cache->head; u.object; u.object = next) -+ for (i = 0; i < ARRAY_SIZE(cache->heads); ++i) - { -- next = u.header->next; -- vkd3d_free(u.object); -+ for (u.object = cache->heads[i].head; u.object; u.object = next) -+ { -+ next = u.header->next; -+ vkd3d_free(u.object); -+ } - } - } - - /* ID3D12Device */ --static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) -+static inline struct d3d12_device *impl_from_ID3D12Device1(ID3D12Device1 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device1_iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Device) -+ if (IsEqualGUID(riid, &IID_ID3D12Device1) -+ || IsEqualGUID(riid, &IID_ID3D12Device) - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -@@ -2475,9 +2482,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - ULONG refcount = InterlockedIncrement(&device->refcount); - - TRACE("%p increasing refcount to %u.\n", device, refcount); -@@ -2485,9 +2492,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) - return refcount; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - ULONG refcount = InterlockedDecrement(&device->refcount); - - TRACE("%p decreasing refcount to %u.\n", device, refcount); -@@ -2521,10 +2528,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2532,10 +2539,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface - return vkd3d_get_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2543,19 +2550,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface - return vkd3d_set_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device1 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&device->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, const WCHAR *name) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); - -@@ -2563,17 +2570,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const - VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device *iface) -+static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device1 *iface) - { - TRACE("iface %p.\n", iface); - - return 1; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 *iface, - const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_queue *object; - HRESULT hr; - -@@ -2587,10 +2594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *i - riid, command_queue); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device1 *iface, - D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_allocator *object; - HRESULT hr; - -@@ -2604,10 +2611,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic - riid, command_allocator); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device1 *iface, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2621,10 +2628,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device1 *iface, - const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2638,11 +2645,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device1 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, - ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_list *object; - HRESULT hr; - -@@ -2655,8 +2662,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *if - initial_pipeline_state, &object))) - return hr; - -- return return_interface(&object->ID3D12GraphicsCommandList2_iface, -- &IID_ID3D12GraphicsCommandList2, riid, command_list); -+ return return_interface(&object->ID3D12GraphicsCommandList3_iface, -+ &IID_ID3D12GraphicsCommandList3, riid, command_list); - } - - /* Direct3D feature levels restrict which formats can be optionally supported. */ -@@ -2765,10 +2772,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) - return true; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 *iface, - D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", - iface, feature, feature_data, feature_data_size); -@@ -3267,10 +3274,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device * - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 *iface, - const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_descriptor_heap *object; - HRESULT hr; - -@@ -3284,7 +3291,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device - &IID_ID3D12DescriptorHeap, riid, descriptor_heap); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device *iface, -+static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device1 *iface, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { - TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); -@@ -3307,11 +3314,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 *iface, - UINT node_mask, const void *bytecode, SIZE_T bytecode_length, - REFIID riid, void **root_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_root_signature *object; - HRESULT hr; - -@@ -3327,10 +3334,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * - &IID_ID3D12RootSignature, riid, root_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device1 *iface, - const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); -@@ -3339,11 +3346,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device1 *iface, - ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", -@@ -3353,11 +3360,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device1 *iface, - ID3D12Resource *resource, ID3D12Resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", -@@ -3368,7 +3375,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 *iface, - ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3376,10 +3383,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device * - iface, resource, desc, descriptor.ptr); - - d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 *iface, - ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3387,13 +3394,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * - iface, resource, desc, descriptor.ptr); - - d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, - const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); -@@ -3402,16 +3409,17 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - unsigned int dst_range_size, src_range_size; -+ struct d3d12_descriptor_heap *dst_heap; - const struct d3d12_desc *src; - struct d3d12_desc *dst; - -@@ -3441,13 +3449,14 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, - src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; - - dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); -+ dst_heap = d3d12_desc_get_descriptor_heap(dst); - src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - - for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) - { - if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) - continue; -- d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); -+ d3d12_desc_copy(&dst[dst_idx], &src[src_idx], dst_heap, device); - } - - if (dst_idx >= dst_range_size) -@@ -3463,7 +3472,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, - } - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -@@ -3478,10 +3487,10 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( -- ID3D12Device *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device1 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - const D3D12_RESOURCE_DESC *desc; - uint64_t requested_alignment; - -@@ -3554,10 +3563,10 @@ invalid: - return info; - } - --static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device *iface, -+static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device1 *iface, - D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - bool coherent; - - TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", -@@ -3597,12 +3606,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope - return heap_properties; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device1 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_resource *object; - HRESULT hr; - -@@ -3621,10 +3630,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi - return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, - const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -3640,12 +3649,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 *iface, - ID3D12Heap *heap, UINT64 heap_offset, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_heap *heap_object; - struct d3d12_resource *object; - HRESULT hr; -@@ -3664,11 +3673,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device - return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device1 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_resource *object; - HRESULT hr; - -@@ -3682,11 +3691,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic - return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 *iface, - ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, - const WCHAR *name, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", - iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); -@@ -3694,7 +3703,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *i - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *iface, - HANDLE handle, REFIID riid, void **object) - { - FIXME("iface %p, handle %p, riid %s, object %p stub!\n", -@@ -3703,10 +3712,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *ifa - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device1 *iface, - const WCHAR *name, DWORD access, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - FIXME("iface %p, name %s, access %#x, handle %p stub!\n", - iface, debugstr_w(name, device->wchar_size), access, handle); -@@ -3714,7 +3723,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -3723,7 +3732,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -3732,10 +3741,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, - UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_fence *object; - HRESULT hr; - -@@ -3745,24 +3754,24 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, - if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) - return hr; - -- return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); -+ return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device1 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p.\n", iface); - - return device->removed_reason; - } - --static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 *iface, - const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, - UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, - UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; - unsigned int width, height, depth, plane_count, sub_resources_per_plane; -@@ -3842,10 +3851,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i - *total_bytes = total; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *iface, - const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_query_heap *object; - HRESULT hr; - -@@ -3858,18 +3867,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *ifac - return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device *iface, BOOL enable) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device1 *iface, BOOL enable) - { - FIXME("iface %p, enable %#x stub!\n", iface, enable); - - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device1 *iface, - const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, - REFIID iid, void **command_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_signature *object; - HRESULT hr; - -@@ -3883,23 +3892,29 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic - &IID_ID3D12CommandSignature, iid, command_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *iface, - ID3D12Resource *resource, UINT *total_tile_count, - D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, - UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, - D3D12_SUBRESOURCE_TILING *sub_resource_tilings) - { -- FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " -+ const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); -+ -+ TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " - "standard_title_shape %p, sub_resource_tiling_count %p, " -- "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", -+ "first_sub_resource_tiling %u, sub_resource_tilings %p.\n", - iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, - sub_resource_tiling_count, first_sub_resource_tiling, - sub_resource_tilings); -+ -+ d3d12_resource_get_tiling(device, resource_impl, total_tile_count, packed_mip_info, standard_tile_shape, -+ sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); - } - --static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) -+static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface, LUID *luid) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, luid %p.\n", iface, luid); - -@@ -3908,7 +3923,33 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, - return luid; - } - --static const struct ID3D12DeviceVtbl d3d12_device_vtbl = -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device1 *iface, -+ const void *blob, SIZE_T blob_size, REFIID iid, void **lib) -+{ -+ FIXME("iface %p, blob %p, blob_size %lu, iid %s, lib %p stub!\n", iface, blob, blob_size, debugstr_guid(iid), lib); -+ -+ return DXGI_ERROR_UNSUPPORTED; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device1 *iface, -+ ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, -+ D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) -+{ -+ FIXME("iface %p, fences %p, values %p, fence_count %u, flags %#x, event %p stub!\n", -+ iface, fences, values, fence_count, flags, event); -+ -+ return E_NOTIMPL; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 *iface, -+ UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) -+{ -+ FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); -+ -+ return S_OK; -+} -+ -+static const struct ID3D12Device1Vtbl d3d12_device_vtbl = - { - /* IUnknown methods */ - d3d12_device_QueryInterface, -@@ -3957,14 +3998,18 @@ static const struct ID3D12DeviceVtbl d3d12_device_vtbl = - d3d12_device_CreateCommandSignature, - d3d12_device_GetResourceTiling, - d3d12_device_GetAdapterLuid, -+ /* ID3D12Device1 methods */ -+ d3d12_device_CreatePipelineLibrary, -+ d3d12_device_SetEventOnMultipleFenceCompletion, -+ d3d12_device_SetResidencyPriority, - }; - --struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) -+struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface) - { - if (!iface) - return NULL; - assert(iface->lpVtbl == &d3d12_device_vtbl); -- return impl_from_ID3D12Device(iface); -+ return impl_from_ID3D12Device1(iface); - } - - static HRESULT d3d12_device_init(struct d3d12_device *device, -@@ -3973,7 +4018,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, - const struct vkd3d_vk_device_procs *vk_procs; - HRESULT hr; - -- device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; -+ device->ID3D12Device1_iface.lpVtbl = &d3d12_device_vtbl; - device->refcount = 1; - - vkd3d_instance_incref(device->vkd3d_instance = instance); -@@ -4170,28 +4215,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha - - IUnknown *vkd3d_get_device_parent(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->parent; - } - - VkDevice vkd3d_get_vk_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->vk_device; - } - - VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->vk_physical_device; - } - - struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->vkd3d_instance; - } -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index ea7b6859cc1..f3842958d96 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - VkImageFormatListCreateInfoKHR format_list; - const struct vkd3d_format *format; - VkImageCreateInfo image_info; -+ uint32_t count; - VkResult vr; - - if (resource) -@@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) - resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; - -+ if (sparse_resource) -+ { -+ count = 0; -+ VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, -+ image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); -+ -+ if (!count) -+ { -+ FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", -+ image_info.format, image_info.imageType, image_info.samples, image_info.usage); -+ return E_INVALIDARG; -+ } -+ } -+ - if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) - WARN("Failed to create Vulkan image, vr %d.\n", vr); - -@@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - D3D12_RESOURCE_DESC validated_desc; - VkMemoryRequirements requirements; - VkImage vk_image; -+ bool tiled; - HRESULT hr; - - assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -@@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - desc = &validated_desc; - } - -+ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; -+ - /* XXX: We have to create an image to get its memory requirements. */ -- if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) -+ if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) - { - VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); - VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); -@@ -953,6 +971,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - return hr; - } - -+static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) -+{ -+ vkd3d_free(resource->tiles.subresources); -+} -+ - static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -@@ -968,6 +991,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 - else - VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); - -+ d3d12_resource_tile_info_cleanup(resource); -+ - if (resource->heap) - d3d12_heap_resource_destroyed(resource->heap); - } -@@ -1039,12 +1064,196 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, - box->back = d3d12_resource_desc_get_depth(&resource->desc, level); - } - --/* ID3D12Resource */ --static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+static void compute_image_subresource_size_in_tiles(const VkExtent3D *tile_extent, -+ const struct D3D12_RESOURCE_DESC *desc, unsigned int miplevel_idx, -+ struct vkd3d_tiled_region_extent *size) - { -- return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+ unsigned int width, height, depth; -+ -+ width = d3d12_resource_desc_get_width(desc, miplevel_idx); -+ height = d3d12_resource_desc_get_height(desc, miplevel_idx); -+ depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); -+ size->width = (width + tile_extent->width - 1) / tile_extent->width; -+ size->height = (height + tile_extent->height - 1) / tile_extent->height; -+ size->depth = (depth + tile_extent->depth - 1) / tile_extent->depth; -+} -+ -+void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, -+ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, -+ UINT *subresource_tiling_count, UINT first_subresource_tiling, -+ D3D12_SUBRESOURCE_TILING *subresource_tilings) -+{ -+ unsigned int i, subresource, subresource_count, miplevel_idx, count; -+ const struct vkd3d_subresource_tile_info *tile_info; -+ const VkExtent3D *tile_extent; -+ -+ tile_extent = &resource->tiles.tile_extent; -+ -+ if (packed_mip_info) -+ { -+ packed_mip_info->NumStandardMips = resource->tiles.standard_mip_count; -+ packed_mip_info->NumPackedMips = resource->desc.MipLevels - packed_mip_info->NumStandardMips; -+ packed_mip_info->NumTilesForPackedMips = !!resource->tiles.packed_mip_tile_count; /* non-zero dummy value */ -+ packed_mip_info->StartTileIndexInOverallResource = packed_mip_info->NumPackedMips -+ ? resource->tiles.subresources[resource->tiles.standard_mip_count].offset : 0; -+ } -+ -+ if (standard_tile_shape) -+ { -+ /* D3D12 docs say tile shape is cleared to zero if there is no standard mip, but drivers don't to do this. */ -+ standard_tile_shape->WidthInTexels = tile_extent->width; -+ standard_tile_shape->HeightInTexels = tile_extent->height; -+ standard_tile_shape->DepthInTexels = tile_extent->depth; -+ } -+ -+ if (total_tile_count) -+ *total_tile_count = resource->tiles.total_count; -+ -+ if (!subresource_tiling_count) -+ return; -+ -+ subresource_count = resource->tiles.subresource_count; -+ -+ count = subresource_count - min(first_subresource_tiling, subresource_count); -+ count = min(count, *subresource_tiling_count); -+ -+ for (i = 0; i < count; ++i) -+ { -+ subresource = i + first_subresource_tiling; -+ miplevel_idx = subresource % resource->desc.MipLevels; -+ if (miplevel_idx >= resource->tiles.standard_mip_count) -+ { -+ memset(&subresource_tilings[i], 0, sizeof(subresource_tilings[i])); -+ subresource_tilings[i].StartTileIndexInOverallResource = D3D12_PACKED_TILE; -+ continue; -+ } -+ -+ tile_info = &resource->tiles.subresources[subresource]; -+ subresource_tilings[i].StartTileIndexInOverallResource = tile_info->offset; -+ subresource_tilings[i].WidthInTiles = tile_info->extent.width; -+ subresource_tilings[i].HeightInTiles = tile_info->extent.height; -+ subresource_tilings[i].DepthInTiles = tile_info->extent.depth; -+ } -+ *subresource_tiling_count = i; - } - -+static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) -+{ -+ unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; -+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -+ VkSparseImageMemoryRequirements *sparse_requirements_array; -+ VkSparseImageMemoryRequirements sparse_requirements = {0}; -+ struct vkd3d_subresource_tile_info *tile_info; -+ VkMemoryRequirements requirements; -+ const VkExtent3D *tile_extent; -+ uint32_t requirement_count; -+ -+ subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); -+ -+ if (!(resource->tiles.subresources = vkd3d_calloc(subresource_count, sizeof(*resource->tiles.subresources)))) -+ { -+ ERR("Failed to allocate subresource info array.\n"); -+ return false; -+ } -+ -+ if (d3d12_resource_is_buffer(resource)) -+ { -+ assert(subresource_count == 1); -+ -+ VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); -+ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); -+ -+ tile_info = &resource->tiles.subresources[0]; -+ tile_info->offset = 0; -+ tile_info->extent.width = align(resource->desc.Width, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; -+ tile_info->extent.height = 1; -+ tile_info->extent.depth = 1; -+ tile_info->count = tile_info->extent.width; -+ -+ resource->tiles.tile_extent.width = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; -+ resource->tiles.tile_extent.height = 1; -+ resource->tiles.tile_extent.depth = 1; -+ resource->tiles.total_count = tile_info->extent.width; -+ resource->tiles.subresource_count = 1; -+ resource->tiles.standard_mip_count = 1; -+ resource->tiles.packed_mip_tile_count = 0; -+ } -+ else -+ { -+ VK_CALL(vkGetImageMemoryRequirements(device->vk_device, resource->u.vk_image, &requirements)); -+ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); -+ -+ requirement_count = 0; -+ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, &requirement_count, NULL)); -+ if (!(sparse_requirements_array = vkd3d_calloc(requirement_count, sizeof(*sparse_requirements_array)))) -+ { -+ ERR("Failed to allocate sparse requirements array.\n"); -+ return false; -+ } -+ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, -+ &requirement_count, sparse_requirements_array)); -+ -+ for (i = 0; i < requirement_count; ++i) -+ { -+ if (sparse_requirements_array[i].formatProperties.aspectMask & resource->format->vk_aspect_mask) -+ { -+ if (sparse_requirements.formatProperties.aspectMask) -+ { -+ WARN("Ignoring properties for aspect mask %#x.\n", -+ sparse_requirements_array[i].formatProperties.aspectMask); -+ } -+ else -+ { -+ sparse_requirements = sparse_requirements_array[i]; -+ } -+ } -+ } -+ vkd3d_free(sparse_requirements_array); -+ if (!sparse_requirements.formatProperties.aspectMask) -+ { -+ WARN("Failed to get sparse requirements.\n"); -+ return false; -+ } -+ -+ resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; -+ resource->tiles.subresource_count = subresource_count; -+ resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize -+ ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; -+ resource->tiles.packed_mip_tile_count = (resource->tiles.standard_mip_count < resource->desc.MipLevels) -+ ? sparse_requirements.imageMipTailSize / requirements.alignment : 0; -+ -+ for (i = 0, start_idx = 0; i < subresource_count; ++i) -+ { -+ miplevel_idx = i % resource->desc.MipLevels; -+ -+ tile_extent = &sparse_requirements.formatProperties.imageGranularity; -+ tile_info = &resource->tiles.subresources[i]; -+ compute_image_subresource_size_in_tiles(tile_extent, &resource->desc, miplevel_idx, &tile_info->extent); -+ tile_info->offset = start_idx; -+ tile_info->count = 0; -+ -+ if (miplevel_idx < resource->tiles.standard_mip_count) -+ { -+ tile_count = tile_info->extent.width * tile_info->extent.height * tile_info->extent.depth; -+ start_idx += tile_count; -+ tile_info->count = tile_count; -+ } -+ else if (miplevel_idx == resource->tiles.standard_mip_count) -+ { -+ tile_info->count = 1; /* Non-zero dummy value */ -+ start_idx += 1; -+ } -+ } -+ resource->tiles.total_count = start_idx; -+ } -+ -+ return true; -+} -+ -+/* ID3D12Resource */ - static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, - REFIID riid, void **object) - { -@@ -1661,6 +1870,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d - return E_INVALIDARG; - } - -+ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) -+ { -+ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) -+ { -+ WARN("The device does not support tiled 3D images.\n"); -+ return E_INVALIDARG; -+ } -+ if (format->plane_count > 1) -+ { -+ WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", -+ format->dxgi_format); -+ return E_INVALIDARG; -+ } -+ } -+ - if (!d3d12_resource_validate_texture_format(desc, format) - || !d3d12_resource_validate_texture_alignment(desc, format)) - return E_INVALIDARG; -@@ -1722,6 +1946,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - - resource->desc = *desc; - -+ if (!heap_properties && !device->vk_info.sparse_binding) -+ { -+ WARN("The device does not support tiled images.\n"); -+ return E_INVALIDARG; -+ } -+ - if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) - return E_INVALIDARG; - -@@ -1787,6 +2017,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - resource->heap = NULL; - resource->heap_offset = 0; - -+ memset(&resource->tiles, 0, sizeof(resource->tiles)); -+ - if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) - { - d3d12_resource_destroy(resource, device); -@@ -1972,6 +2204,12 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - desc, initial_state, optimized_clear_value, &object))) - return hr; - -+ if (!d3d12_resource_init_tiles(object, device)) -+ { -+ d3d12_resource_Release(&object->ID3D12Resource_iface); -+ return E_OUTOFMEMORY; -+ } -+ - TRACE("Created reserved resource %p.\n", object); - - *resource = object; -@@ -1982,7 +2220,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - HRESULT vkd3d_create_image_resource(ID3D12Device *device, - const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) - { -- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device1((ID3D12Device1 *)device); - struct d3d12_resource *object; - HRESULT hr; - -@@ -2044,38 +2282,67 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) - return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); - } - --/* Objects are cached so that vkd3d_view_incref() can safely check the refcount -- * of an object freed by another thread. */ -+#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1) -+ -+/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an -+ * object freed by another thread. This could be implemented as a single atomic linked -+ * list, but it requires handling the ABA problem, which brings issues with cross-platform -+ * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */ - static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) - { - union d3d12_desc_object u; -- void *next; -+ unsigned int i; - -- do -+ STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK)); -+ -+ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; -+ for (;;) - { -- u.object = cache->head; -- if (!u.object) -+ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ { -+ if ((u.object = cache->heads[i].head)) -+ { -+ vkd3d_atomic_decrement(&cache->free_count); -+ cache->heads[i].head = u.header->next; -+ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ return u.object; -+ } -+ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ } -+ /* Keeping a free count avoids uncertainty over when this loop should terminate, -+ * which could result in excess allocations gradually increasing without limit. */ -+ if (cache->free_count < ARRAY_SIZE(cache->heads)) - return vkd3d_malloc(cache->size); -- next = u.header->next; -- } -- while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); - -- return u.object; -+ i = (i + 1) & HEAD_INDEX_MASK; -+ } - } - - static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) - { - union d3d12_desc_object u = {object}; -+ unsigned int i; - void *head; - -- do -+ /* Using the same index as above may result in a somewhat uneven distribution, -+ * but the main objective is to avoid costly spinlock contention. */ -+ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; -+ for (;;) - { -- head = cache->head; -- u.header->next = head; -+ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ break; -+ i = (i + 1) & HEAD_INDEX_MASK; - } -- while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); -+ -+ head = cache->heads[i].head; -+ u.header->next = head; -+ cache->heads[i].head = u.object; -+ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_increment(&cache->free_count); - } - -+#undef HEAD_INDEX_MASK -+ - static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) - { - struct vkd3d_cbuffer_desc *desc; -@@ -2368,13 +2635,11 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr - descriptor_writes_free_object_refs(&writes, device); - } - --static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) -+static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_descriptor_heap *descriptor_heap) - { -- struct d3d12_descriptor_heap *descriptor_heap; - unsigned int i, head; - - i = dst->index; -- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - head = descriptor_heap->dirty_list_head; - - /* Only one thread can swap the value away from zero. */ -@@ -2388,14 +2653,20 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) - } - } - --void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -- struct d3d12_device *device) -+static inline void descriptor_heap_write_atomic(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_desc *dst, -+ const struct d3d12_desc *src, struct d3d12_device *device) - { - void *object = src->s.u.object; - - d3d12_desc_replace(dst, object, device); -- if (device->use_vk_heaps && object && !dst->next) -- d3d12_desc_mark_as_modified(dst); -+ if (descriptor_heap->use_vk_heaps && object && !dst->next) -+ d3d12_desc_mark_as_modified(dst, descriptor_heap); -+} -+ -+void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -+ struct d3d12_device *device) -+{ -+ descriptor_heap_write_atomic(d3d12_desc_get_descriptor_heap(dst), dst, src, device); - } - - static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) -@@ -2403,7 +2674,9 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic - d3d12_desc_replace(descriptor, NULL, device); - } - --void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, -+/* This is a major performance bottleneck for some games, so do not load the device -+ * pointer from dst_heap. In some cases device will not be used. */ -+void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, - struct d3d12_device *device) - { - struct d3d12_desc tmp; -@@ -2411,7 +2684,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, - assert(dst != src); - - tmp.s.u.object = d3d12_desc_get_object_ref(src, device); -- d3d12_desc_write_atomic(dst, &tmp, device); -+ descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); - } - - static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, -@@ -3810,7 +4083,15 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get - - TRACE("iface %p, descriptor %p.\n", iface, descriptor); - -- descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; -+ if (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) -+ { -+ descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; -+ } -+ else -+ { -+ WARN("Heap %p is not shader-visible.\n", iface); -+ descriptor->ptr = 0; -+ } - - return descriptor; - } -@@ -3913,7 +4194,7 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri - descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; - memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); - -- if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV -+ if (!descriptor_heap->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV - && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) - return S_OK; - -@@ -3944,6 +4225,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript - if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) - return hr; - -+ descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); - d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 5e46b467252..7ae46c862cc 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -20,6 +20,7 @@ - - #include "vkd3d_private.h" - #include "vkd3d_shaders.h" -+#include "vkd3d_shader_utils.h" - - /* ID3D12RootSignature */ - static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) -@@ -374,8 +375,8 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig - - if (unbounded && range->OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) - { -- WARN("An unbounded range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " -- "another unbounded range.\n"); -+ WARN("A range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " -+ "an unbounded range.\n"); - return E_INVALIDARG; - } - -@@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, - compile_info.next = shader_interface; - compile_info.source.code = code->pShaderBytecode; - compile_info.source.size = code->BytecodeLength; -- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; - compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; - compile_info.options = options; - compile_info.option_count = ARRAY_SIZE(options); - compile_info.log_level = VKD3D_SHADER_LOG_NONE; - compile_info.source_name = NULL; - -- if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) -+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 -+ || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) - { - WARN("Failed to compile shader, vkd3d result %d.\n", ret); - return hresult_from_vkd3d_result(ret); -@@ -2008,6 +2009,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - struct vkd3d_shader_scan_descriptor_info *descriptor_info) - { - struct vkd3d_shader_compile_info compile_info; -+ enum vkd3d_result ret; - - const struct vkd3d_shader_compile_option options[] = - { -@@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - compile_info.next = descriptor_info; - compile_info.source.code = code->pShaderBytecode; - compile_info.source.size = code->BytecodeLength; -- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; - compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; - compile_info.options = options; - compile_info.option_count = ARRAY_SIZE(options); - compile_info.log_level = VKD3D_SHADER_LOG_NONE; - compile_info.source_name = NULL; - -+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) -+ return ret; -+ - return vkd3d_shader_scan(&compile_info, NULL); - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index 88301fbb313..159560afd8e 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - - if (!device) - { -- ID3D12Device_Release(&object->ID3D12Device_iface); -+ ID3D12Device_Release(&object->ID3D12Device1_iface); - return S_FALSE; - } - -- return return_interface(&object->ID3D12Device_iface, &IID_ID3D12Device, iid, device); -+ return return_interface(&object->ID3D12Device1_iface, &IID_ID3D12Device, iid, device); - } - - /* ID3D12RootSignatureDeserializer */ -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index b0150754434..a18287b4cd4 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -147,9 +147,12 @@ struct vkd3d_vulkan_info - unsigned int max_vertex_attrib_divisor; - - VkPhysicalDeviceLimits device_limits; -- VkPhysicalDeviceSparseProperties sparse_properties; - struct vkd3d_device_descriptor_limits descriptor_limits; - -+ VkPhysicalDeviceSparseProperties sparse_properties; -+ bool sparse_binding; -+ bool sparse_residency_3d; -+ - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; - - unsigned int shader_extension_count; -@@ -250,6 +253,11 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) - { - } - -+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) -+{ -+ return InterlockedIncrement((LONG volatile *)x); -+} -+ - static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) - { - return InterlockedDecrement((LONG volatile *)x); -@@ -384,6 +392,15 @@ static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) - } - # else - # error "vkd3d_atomic_decrement() not implemented for this platform" -+# endif /* HAVE_SYNC_SUB_AND_FETCH */ -+ -+# if HAVE_SYNC_ADD_AND_FETCH -+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) -+{ -+ return __sync_add_and_fetch(x, 1); -+} -+# else -+# error "vkd3d_atomic_increment() not implemented for this platform" - # endif /* HAVE_SYNC_ADD_AND_FETCH */ - - # if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -@@ -599,10 +616,12 @@ struct vkd3d_signaled_semaphore - /* ID3D12Fence */ - struct d3d12_fence - { -- ID3D12Fence ID3D12Fence_iface; -+ ID3D12Fence1 ID3D12Fence1_iface; - LONG internal_refcount; - LONG refcount; - -+ D3D12_FENCE_FLAGS flags; -+ - uint64_t value; - uint64_t max_pending_value; - struct vkd3d_mutex mutex; -@@ -670,6 +689,30 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); - #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 - #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 - -+struct vkd3d_tiled_region_extent -+{ -+ unsigned int width; -+ unsigned int height; -+ unsigned int depth; -+}; -+ -+struct vkd3d_subresource_tile_info -+{ -+ unsigned int offset; -+ unsigned int count; -+ struct vkd3d_tiled_region_extent extent; -+}; -+ -+struct d3d12_resource_tile_info -+{ -+ VkExtent3D tile_extent; -+ unsigned int total_count; -+ unsigned int standard_mip_count; -+ unsigned int packed_mip_tile_count; -+ unsigned int subresource_count; -+ struct vkd3d_subresource_tile_info *subresources; -+}; -+ - /* ID3D12Resource */ - struct d3d12_resource - { -@@ -698,9 +741,16 @@ struct d3d12_resource - - struct d3d12_device *device; - -+ struct d3d12_resource_tile_info tiles; -+ - struct vkd3d_private_store private_store; - }; - -+static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+{ -+ return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+} -+ - static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) - { - return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; -@@ -713,6 +763,10 @@ static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resour - - bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); - HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); -+void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, -+ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, -+ UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, -+ D3D12_SUBRESOURCE_TILING *sub_resource_tilings); - - HRESULT d3d12_committed_resource_create(struct d3d12_device *device, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, -@@ -853,8 +907,9 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * - { - do - { -- view = src->s.u.object; -- } while (view && !vkd3d_view_incref(view)); -+ if (!(view = src->s.u.object)) -+ return NULL; -+ } while (!vkd3d_view_incref(view)); - - /* Check if the object is still in src to handle the case where it was - * already freed and reused elsewhere when the refcount was incremented. */ -@@ -880,7 +935,10 @@ static inline void d3d12_desc_copy_raw(struct d3d12_desc *dst, const struct d3d1 - dst->s = src->s; - } - --void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); -+struct d3d12_descriptor_heap; -+ -+void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, -+ struct d3d12_device *device); - void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, - struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); - void d3d12_desc_create_srv(struct d3d12_desc *descriptor, -@@ -983,6 +1041,7 @@ struct d3d12_descriptor_heap - D3D12_DESCRIPTOR_HEAP_DESC desc; - - struct d3d12_device *device; -+ bool use_vk_heaps; - - struct vkd3d_private_store private_store; - -@@ -1367,7 +1426,7 @@ enum vkd3d_pipeline_bind_point - /* ID3D12CommandList */ - struct d3d12_command_list - { -- ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; -+ ID3D12GraphicsCommandList3 ID3D12GraphicsCommandList3_iface; - LONG refcount; - - D3D12_COMMAND_LIST_TYPE type; -@@ -1454,6 +1513,8 @@ enum vkd3d_cs_op - VKD3D_CS_OP_WAIT, - VKD3D_CS_OP_SIGNAL, - VKD3D_CS_OP_EXECUTE, -+ VKD3D_CS_OP_UPDATE_MAPPINGS, -+ VKD3D_CS_OP_COPY_MAPPINGS, - }; - - struct vkd3d_cs_wait -@@ -1474,6 +1535,30 @@ struct vkd3d_cs_execute - unsigned int buffer_count; - }; - -+struct vkd3d_cs_update_mappings -+{ -+ struct d3d12_resource *resource; -+ struct d3d12_heap *heap; -+ D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; -+ D3D12_TILE_REGION_SIZE *region_sizes; -+ D3D12_TILE_RANGE_FLAGS *range_flags; -+ UINT *heap_range_offsets; -+ UINT *range_tile_counts; -+ UINT region_count; -+ UINT range_count; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ -+struct vkd3d_cs_copy_mappings -+{ -+ struct d3d12_resource *dst_resource; -+ struct d3d12_resource *src_resource; -+ D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; -+ D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; -+ D3D12_TILE_REGION_SIZE region_size; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ - struct vkd3d_cs_op_data - { - enum vkd3d_cs_op opcode; -@@ -1482,6 +1567,8 @@ struct vkd3d_cs_op_data - struct vkd3d_cs_wait wait; - struct vkd3d_cs_signal signal; - struct vkd3d_cs_execute execute; -+ struct vkd3d_cs_update_mappings update_mappings; -+ struct vkd3d_cs_copy_mappings copy_mappings; - } u; - }; - -@@ -1519,6 +1606,8 @@ struct d3d12_command_queue - * set, aux_op_queue.count must be zero. */ - struct d3d12_command_queue_op_array aux_op_queue; - -+ bool supports_sparse_binding; -+ - struct vkd3d_private_store private_store; - }; - -@@ -1530,6 +1619,7 @@ struct d3d12_command_signature - { - ID3D12CommandSignature ID3D12CommandSignature_iface; - LONG refcount; -+ unsigned int internal_refcount; - - D3D12_COMMAND_SIGNATURE_DESC desc; - -@@ -1600,9 +1690,17 @@ struct vkd3d_uav_clear_state - HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); - void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); - -+struct desc_object_cache_head -+{ -+ void *head; -+ unsigned int spinlock; -+}; -+ - struct vkd3d_desc_object_cache - { -- void * volatile head; -+ struct desc_object_cache_head heads[16]; -+ unsigned int next_index; -+ unsigned int free_count; - size_t size; - }; - -@@ -1611,7 +1709,7 @@ struct vkd3d_desc_object_cache - /* ID3D12Device */ - struct d3d12_device - { -- ID3D12Device ID3D12Device_iface; -+ ID3D12Device1 ID3D12Device1_iface; - LONG refcount; - - VkDevice vk_device; -@@ -1677,27 +1775,27 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 - bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); - void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, - const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); --struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface); -+struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface); - - static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) - { -- return ID3D12Device_QueryInterface(&device->ID3D12Device_iface, iid, object); -+ return ID3D12Device1_QueryInterface(&device->ID3D12Device1_iface, iid, object); - } - - static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) - { -- return ID3D12Device_AddRef(&device->ID3D12Device_iface); -+ return ID3D12Device1_AddRef(&device->ID3D12Device1_iface); - } - - static inline ULONG d3d12_device_release(struct d3d12_device *device) - { -- return ID3D12Device_Release(&device->ID3D12Device_iface); -+ return ID3D12Device1_Release(&device->ID3D12Device1_iface); - } - - static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) - { -- return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); -+ return ID3D12Device1_GetDescriptorHandleIncrementSize(&device->ID3D12Device1_iface, descriptor_type); - } - - /* utils */ --- -2.40.1 -