From d2cf83298d79e1ffe2d6c06079157e5ec5c0cd39 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Sun, 24 Sep 2023 08:30:07 +1000 Subject: [PATCH] Rebase against 56e58cbed19bb5307598d4f433d5b3f01af91a1b. --- ...-90d4529f27b477bf54e3a8657db2fa78c3a.patch | 43604 ---------------- ...-ca05e57e67306e9b97eb22a35cd77728e3e.patch | 3862 ++ staging/upstream-commit | 2 +- 3 files changed, 3863 insertions(+), 43605 deletions(-) delete mode 100644 patches/vkd3d-latest/0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch create mode 100644 patches/vkd3d-latest/0001-Updated-vkd3d-to-ca05e57e67306e9b97eb22a35cd77728e3e.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch deleted file mode 100644 index c6aa9508..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-90d4529f27b477bf54e3a8657db2fa78c3a.patch +++ /dev/null @@ -1,43604 +0,0 @@ -From eaf7c2d83d21cbd5ecfa40ebe1eb9a92c1c323ad Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 17 May 2023 08:35:40 +1000 -Subject: [PATCH] Updated vkd3d to 90d4529f27b477bf54e3a8657db2fa78c3af5eec - (1.9) - ---- - libs/vkd3d/Makefile.in | 7 +- - libs/vkd3d/include/list.h | 270 + - libs/vkd3d/include/private/list.h | 270 + - libs/vkd3d/include/private/rbtree.h | 378 ++ - libs/vkd3d/include/private/vkd3d_common.h | 25 +- - libs/vkd3d/include/private/vkd3d_debug.h | 2 +- - .../include/private/vkd3d_shader_utils.h | 67 + - libs/vkd3d/include/private/vkd3d_test.h | 432 ++ - libs/vkd3d/include/vkd3d.h | 37 + - libs/vkd3d/include/vkd3d_d3d9types.h | 237 + - libs/vkd3d/include/vkd3d_d3dcompiler.h | 74 + - libs/vkd3d/include/vkd3d_shader.h | 307 +- - libs/vkd3d/include/vkd3d_utils.h | 108 + - libs/vkd3d/include/vkd3d_windows.h | 289 + - libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-common/debug.c | 21 +- - .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 75 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1521 ++++- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1813 +----- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 2968 +++++++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 896 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 295 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 8 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 3815 ++++++----- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2391 +++++-- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 799 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c | 980 --- - libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c | 2531 -------- - libs/vkd3d/libs/vkd3d-shader/ir.c | 1294 ++++ - libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 146 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 1720 ++--- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 5619 +++++++++++++++++ - .../libs/vkd3d-shader/vkd3d_shader_main.c | 601 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 207 +- - libs/vkd3d/libs/vkd3d/command.c | 810 ++- - libs/vkd3d/libs/vkd3d/device.c | 440 +- - libs/vkd3d/libs/vkd3d/resource.c | 1235 ++-- - libs/vkd3d/libs/vkd3d/state.c | 18 +- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 356 +- - 41 files changed, 22867 insertions(+), 10202 deletions(-) - create mode 100644 libs/vkd3d/include/list.h - create mode 100644 libs/vkd3d/include/private/list.h - create mode 100644 libs/vkd3d/include/private/rbtree.h - create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h - create mode 100644 libs/vkd3d/include/private/vkd3d_test.h - create mode 100644 libs/vkd3d/include/vkd3d_d3d9types.h - create mode 100644 libs/vkd3d/include/vkd3d_d3dcompiler.h - create mode 100644 libs/vkd3d/include/vkd3d_utils.h - create mode 100644 libs/vkd3d/include/vkd3d_windows.h - rename libs/vkd3d/libs/vkd3d-shader/{trace.c => d3d_asm.c} (97%) - create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c - delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c - delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c - create mode 100644 libs/vkd3d/libs/vkd3d-shader/ir.c - create mode 100644 libs/vkd3d/libs/vkd3d-shader/tpf.c - -diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in -index 0ed4e27ad83..f647af11d07 100644 ---- a/libs/vkd3d/Makefile.in -+++ b/libs/vkd3d/Makefile.in -@@ -14,20 +14,21 @@ SOURCES = \ - libs/vkd3d-common/memory.c \ - libs/vkd3d-common/utf8.c \ - libs/vkd3d-shader/checksum.c \ -+ libs/vkd3d-shader/d3d_asm.c \ - libs/vkd3d-shader/d3dbc.c \ - libs/vkd3d-shader/dxbc.c \ -+ libs/vkd3d-shader/dxil.c \ - libs/vkd3d-shader/glsl.c \ - libs/vkd3d-shader/hlsl.c \ - libs/vkd3d-shader/hlsl.l \ - libs/vkd3d-shader/hlsl.y \ - libs/vkd3d-shader/hlsl_codegen.c \ - libs/vkd3d-shader/hlsl_constant_ops.c \ -- libs/vkd3d-shader/hlsl_sm1.c \ -- libs/vkd3d-shader/hlsl_sm4.c \ -+ libs/vkd3d-shader/ir.c \ - libs/vkd3d-shader/preproc.l \ - libs/vkd3d-shader/preproc.y \ - libs/vkd3d-shader/spirv.c \ -- libs/vkd3d-shader/trace.c \ -+ libs/vkd3d-shader/tpf.c \ - libs/vkd3d-shader/vkd3d_shader_main.c \ - libs/vkd3d/command.c \ - libs/vkd3d/device.c \ -diff --git a/libs/vkd3d/include/list.h b/libs/vkd3d/include/list.h -new file mode 100644 -index 00000000000..2e1d95f3fd4 ---- /dev/null -+++ b/libs/vkd3d/include/list.h -@@ -0,0 +1,270 @@ -+/* -+ * Linked lists support -+ * -+ * Copyright (C) 2002 Alexandre Julliard -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __WINE_SERVER_LIST_H -+#define __WINE_SERVER_LIST_H -+ -+#include -+ -+struct list -+{ -+ struct list *next; -+ struct list *prev; -+}; -+ -+/* Define a list like so: -+ * -+ * struct gadget -+ * { -+ * struct list entry; <-- doesn't have to be the first item in the struct -+ * int a, b; -+ * }; -+ * -+ * static struct list global_gadgets = LIST_INIT( global_gadgets ); -+ * -+ * or -+ * -+ * struct some_global_thing -+ * { -+ * struct list gadgets; -+ * }; -+ * -+ * list_init( &some_global_thing->gadgets ); -+ * -+ * Manipulate it like this: -+ * -+ * list_add_head( &global_gadgets, &new_gadget->entry ); -+ * list_remove( &new_gadget->entry ); -+ * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); -+ * -+ * And to iterate over it: -+ * -+ * struct gadget *gadget; -+ * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) -+ * { -+ * ... -+ * } -+ * -+ */ -+ -+/* add an element after the specified one */ -+static inline void list_add_after( struct list *elem, struct list *to_add ) -+{ -+ to_add->next = elem->next; -+ to_add->prev = elem; -+ elem->next->prev = to_add; -+ elem->next = to_add; -+} -+ -+/* add an element before the specified one */ -+static inline void list_add_before( struct list *elem, struct list *to_add ) -+{ -+ to_add->next = elem; -+ to_add->prev = elem->prev; -+ elem->prev->next = to_add; -+ elem->prev = to_add; -+} -+ -+/* add element at the head of the list */ -+static inline void list_add_head( struct list *list, struct list *elem ) -+{ -+ list_add_after( list, elem ); -+} -+ -+/* add element at the tail of the list */ -+static inline void list_add_tail( struct list *list, struct list *elem ) -+{ -+ list_add_before( list, elem ); -+} -+ -+/* remove an element from its list */ -+static inline void list_remove( struct list *elem ) -+{ -+ elem->next->prev = elem->prev; -+ elem->prev->next = elem->next; -+} -+ -+/* get the next element */ -+static inline struct list *list_next( const struct list *list, const struct list *elem ) -+{ -+ struct list *ret = elem->next; -+ if (elem->next == list) ret = NULL; -+ return ret; -+} -+ -+/* get the previous element */ -+static inline struct list *list_prev( const struct list *list, const struct list *elem ) -+{ -+ struct list *ret = elem->prev; -+ if (elem->prev == list) ret = NULL; -+ return ret; -+} -+ -+/* get the first element */ -+static inline struct list *list_head( const struct list *list ) -+{ -+ return list_next( list, list ); -+} -+ -+/* get the last element */ -+static inline struct list *list_tail( const struct list *list ) -+{ -+ return list_prev( list, list ); -+} -+ -+/* check if a list is empty */ -+static inline int list_empty( const struct list *list ) -+{ -+ return list->next == list; -+} -+ -+/* initialize a list */ -+static inline void list_init( struct list *list ) -+{ -+ list->next = list->prev = list; -+} -+ -+/* count the elements of a list */ -+static inline unsigned int list_count( const struct list *list ) -+{ -+ unsigned count = 0; -+ const struct list *ptr; -+ for (ptr = list->next; ptr != list; ptr = ptr->next) count++; -+ return count; -+} -+ -+/* move all elements from src to before the specified element */ -+static inline void list_move_before( struct list *dst, struct list *src ) -+{ -+ if (list_empty(src)) return; -+ -+ dst->prev->next = src->next; -+ src->next->prev = dst->prev; -+ dst->prev = src->prev; -+ src->prev->next = dst; -+ list_init(src); -+} -+ -+/* move all elements from src to after the specified element */ -+static inline void list_move_after( struct list *dst, struct list *src ) -+{ -+ if (list_empty(src)) return; -+ -+ dst->next->prev = src->prev; -+ src->prev->next = dst->next; -+ dst->next = src->next; -+ src->next->prev = dst; -+ list_init(src); -+} -+ -+/* move all elements from src to the head of dst */ -+static inline void list_move_head( struct list *dst, struct list *src ) -+{ -+ list_move_after( dst, src ); -+} -+ -+/* move all elements from src to the tail of dst */ -+static inline void list_move_tail( struct list *dst, struct list *src ) -+{ -+ list_move_before( dst, src ); -+} -+ -+/* move the slice of elements from begin to end inclusive to the head of dst */ -+static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) -+{ -+ struct list *dst_next = dst->next; -+ begin->prev->next = end->next; -+ end->next->prev = begin->prev; -+ dst->next = begin; -+ dst_next->prev = end; -+ begin->prev = dst; -+ end->next = dst_next; -+} -+ -+/* move the slice of elements from begin to end inclusive to the tail of dst */ -+static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) -+{ -+ struct list *dst_prev = dst->prev; -+ begin->prev->next = end->next; -+ end->next->prev = begin->prev; -+ dst_prev->next = begin; -+ dst->prev = end; -+ begin->prev = dst_prev; -+ end->next = dst; -+} -+ -+/* iterate through the list */ -+#define LIST_FOR_EACH(cursor,list) \ -+ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) -+ -+/* iterate through the list, with safety against removal */ -+#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ -+ for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ -+ (cursor) != (list); \ -+ (cursor) = (cursor2), (cursor2) = (cursor)->next) -+ -+/* iterate through the list using a list entry */ -+#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ -+ for ((elem) = LIST_ENTRY((list)->next, type, field); \ -+ &(elem)->field != (list); \ -+ (elem) = LIST_ENTRY((elem)->field.next, type, field)) -+ -+/* iterate through the list using a list entry, with safety against removal */ -+#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ -+ for ((cursor) = LIST_ENTRY((list)->next, type, field), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ -+ &(cursor)->field != (list); \ -+ (cursor) = (cursor2), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) -+ -+/* iterate through the list in reverse order */ -+#define LIST_FOR_EACH_REV(cursor,list) \ -+ for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) -+ -+/* iterate through the list in reverse order, with safety against removal */ -+#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ -+ for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ -+ (cursor) != (list); \ -+ (cursor) = (cursor2), (cursor2) = (cursor)->prev) -+ -+/* iterate through the list in reverse order using a list entry */ -+#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ -+ for ((elem) = LIST_ENTRY((list)->prev, type, field); \ -+ &(elem)->field != (list); \ -+ (elem) = LIST_ENTRY((elem)->field.prev, type, field)) -+ -+/* iterate through the list in reverse order using a list entry, with safety against removal */ -+#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ -+ for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ -+ &(cursor)->field != (list); \ -+ (cursor) = (cursor2), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) -+ -+/* macros for statically initialized lists */ -+#undef LIST_INIT -+#define LIST_INIT(list) { &(list), &(list) } -+ -+/* get pointer to object containing list element */ -+#undef LIST_ENTRY -+#define LIST_ENTRY(elem, type, field) \ -+ ((type *)((char *)(elem) - offsetof(type, field))) -+ -+#endif /* __WINE_SERVER_LIST_H */ -diff --git a/libs/vkd3d/include/private/list.h b/libs/vkd3d/include/private/list.h -new file mode 100644 -index 00000000000..2e1d95f3fd4 ---- /dev/null -+++ b/libs/vkd3d/include/private/list.h -@@ -0,0 +1,270 @@ -+/* -+ * Linked lists support -+ * -+ * Copyright (C) 2002 Alexandre Julliard -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __WINE_SERVER_LIST_H -+#define __WINE_SERVER_LIST_H -+ -+#include -+ -+struct list -+{ -+ struct list *next; -+ struct list *prev; -+}; -+ -+/* Define a list like so: -+ * -+ * struct gadget -+ * { -+ * struct list entry; <-- doesn't have to be the first item in the struct -+ * int a, b; -+ * }; -+ * -+ * static struct list global_gadgets = LIST_INIT( global_gadgets ); -+ * -+ * or -+ * -+ * struct some_global_thing -+ * { -+ * struct list gadgets; -+ * }; -+ * -+ * list_init( &some_global_thing->gadgets ); -+ * -+ * Manipulate it like this: -+ * -+ * list_add_head( &global_gadgets, &new_gadget->entry ); -+ * list_remove( &new_gadget->entry ); -+ * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); -+ * -+ * And to iterate over it: -+ * -+ * struct gadget *gadget; -+ * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) -+ * { -+ * ... -+ * } -+ * -+ */ -+ -+/* add an element after the specified one */ -+static inline void list_add_after( struct list *elem, struct list *to_add ) -+{ -+ to_add->next = elem->next; -+ to_add->prev = elem; -+ elem->next->prev = to_add; -+ elem->next = to_add; -+} -+ -+/* add an element before the specified one */ -+static inline void list_add_before( struct list *elem, struct list *to_add ) -+{ -+ to_add->next = elem; -+ to_add->prev = elem->prev; -+ elem->prev->next = to_add; -+ elem->prev = to_add; -+} -+ -+/* add element at the head of the list */ -+static inline void list_add_head( struct list *list, struct list *elem ) -+{ -+ list_add_after( list, elem ); -+} -+ -+/* add element at the tail of the list */ -+static inline void list_add_tail( struct list *list, struct list *elem ) -+{ -+ list_add_before( list, elem ); -+} -+ -+/* remove an element from its list */ -+static inline void list_remove( struct list *elem ) -+{ -+ elem->next->prev = elem->prev; -+ elem->prev->next = elem->next; -+} -+ -+/* get the next element */ -+static inline struct list *list_next( const struct list *list, const struct list *elem ) -+{ -+ struct list *ret = elem->next; -+ if (elem->next == list) ret = NULL; -+ return ret; -+} -+ -+/* get the previous element */ -+static inline struct list *list_prev( const struct list *list, const struct list *elem ) -+{ -+ struct list *ret = elem->prev; -+ if (elem->prev == list) ret = NULL; -+ return ret; -+} -+ -+/* get the first element */ -+static inline struct list *list_head( const struct list *list ) -+{ -+ return list_next( list, list ); -+} -+ -+/* get the last element */ -+static inline struct list *list_tail( const struct list *list ) -+{ -+ return list_prev( list, list ); -+} -+ -+/* check if a list is empty */ -+static inline int list_empty( const struct list *list ) -+{ -+ return list->next == list; -+} -+ -+/* initialize a list */ -+static inline void list_init( struct list *list ) -+{ -+ list->next = list->prev = list; -+} -+ -+/* count the elements of a list */ -+static inline unsigned int list_count( const struct list *list ) -+{ -+ unsigned count = 0; -+ const struct list *ptr; -+ for (ptr = list->next; ptr != list; ptr = ptr->next) count++; -+ return count; -+} -+ -+/* move all elements from src to before the specified element */ -+static inline void list_move_before( struct list *dst, struct list *src ) -+{ -+ if (list_empty(src)) return; -+ -+ dst->prev->next = src->next; -+ src->next->prev = dst->prev; -+ dst->prev = src->prev; -+ src->prev->next = dst; -+ list_init(src); -+} -+ -+/* move all elements from src to after the specified element */ -+static inline void list_move_after( struct list *dst, struct list *src ) -+{ -+ if (list_empty(src)) return; -+ -+ dst->next->prev = src->prev; -+ src->prev->next = dst->next; -+ dst->next = src->next; -+ src->next->prev = dst; -+ list_init(src); -+} -+ -+/* move all elements from src to the head of dst */ -+static inline void list_move_head( struct list *dst, struct list *src ) -+{ -+ list_move_after( dst, src ); -+} -+ -+/* move all elements from src to the tail of dst */ -+static inline void list_move_tail( struct list *dst, struct list *src ) -+{ -+ list_move_before( dst, src ); -+} -+ -+/* move the slice of elements from begin to end inclusive to the head of dst */ -+static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) -+{ -+ struct list *dst_next = dst->next; -+ begin->prev->next = end->next; -+ end->next->prev = begin->prev; -+ dst->next = begin; -+ dst_next->prev = end; -+ begin->prev = dst; -+ end->next = dst_next; -+} -+ -+/* move the slice of elements from begin to end inclusive to the tail of dst */ -+static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) -+{ -+ struct list *dst_prev = dst->prev; -+ begin->prev->next = end->next; -+ end->next->prev = begin->prev; -+ dst_prev->next = begin; -+ dst->prev = end; -+ begin->prev = dst_prev; -+ end->next = dst; -+} -+ -+/* iterate through the list */ -+#define LIST_FOR_EACH(cursor,list) \ -+ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) -+ -+/* iterate through the list, with safety against removal */ -+#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ -+ for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ -+ (cursor) != (list); \ -+ (cursor) = (cursor2), (cursor2) = (cursor)->next) -+ -+/* iterate through the list using a list entry */ -+#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ -+ for ((elem) = LIST_ENTRY((list)->next, type, field); \ -+ &(elem)->field != (list); \ -+ (elem) = LIST_ENTRY((elem)->field.next, type, field)) -+ -+/* iterate through the list using a list entry, with safety against removal */ -+#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ -+ for ((cursor) = LIST_ENTRY((list)->next, type, field), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ -+ &(cursor)->field != (list); \ -+ (cursor) = (cursor2), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) -+ -+/* iterate through the list in reverse order */ -+#define LIST_FOR_EACH_REV(cursor,list) \ -+ for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) -+ -+/* iterate through the list in reverse order, with safety against removal */ -+#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ -+ for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ -+ (cursor) != (list); \ -+ (cursor) = (cursor2), (cursor2) = (cursor)->prev) -+ -+/* iterate through the list in reverse order using a list entry */ -+#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ -+ for ((elem) = LIST_ENTRY((list)->prev, type, field); \ -+ &(elem)->field != (list); \ -+ (elem) = LIST_ENTRY((elem)->field.prev, type, field)) -+ -+/* iterate through the list in reverse order using a list entry, with safety against removal */ -+#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ -+ for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ -+ &(cursor)->field != (list); \ -+ (cursor) = (cursor2), \ -+ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) -+ -+/* macros for statically initialized lists */ -+#undef LIST_INIT -+#define LIST_INIT(list) { &(list), &(list) } -+ -+/* get pointer to object containing list element */ -+#undef LIST_ENTRY -+#define LIST_ENTRY(elem, type, field) \ -+ ((type *)((char *)(elem) - offsetof(type, field))) -+ -+#endif /* __WINE_SERVER_LIST_H */ -diff --git a/libs/vkd3d/include/private/rbtree.h b/libs/vkd3d/include/private/rbtree.h -new file mode 100644 -index 00000000000..b5d38bca54c ---- /dev/null -+++ b/libs/vkd3d/include/private/rbtree.h -@@ -0,0 +1,378 @@ -+/* -+ * Red-black search tree support -+ * -+ * Copyright 2009 Henri Verbeet -+ * Copyright 2009 Andrew Riedi -+ * Copyright 2016 Jacek Caban for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __WINE_WINE_RBTREE_H -+#define __WINE_WINE_RBTREE_H -+ -+#define RB_ENTRY_VALUE(element, type, field) \ -+ ((type *)((char *)(element) - offsetof(type, field))) -+ -+struct rb_entry -+{ -+ struct rb_entry *parent; -+ struct rb_entry *left; -+ struct rb_entry *right; -+ unsigned int flags; -+}; -+ -+typedef int (*rb_compare_func)(const void *key, const struct rb_entry *entry); -+ -+struct rb_tree -+{ -+ rb_compare_func compare; -+ struct rb_entry *root; -+}; -+ -+typedef void (rb_traverse_func)(struct rb_entry *entry, void *context); -+ -+#define RB_FLAG_RED 0x1 -+ -+static inline int rb_is_red(struct rb_entry *entry) -+{ -+ return entry && (entry->flags & RB_FLAG_RED); -+} -+ -+static inline void rb_rotate_left(struct rb_tree *tree, struct rb_entry *e) -+{ -+ struct rb_entry *right = e->right; -+ -+ if (!e->parent) -+ tree->root = right; -+ else if (e->parent->left == e) -+ e->parent->left = right; -+ else -+ e->parent->right = right; -+ -+ e->right = right->left; -+ if (e->right) e->right->parent = e; -+ right->left = e; -+ right->parent = e->parent; -+ e->parent = right; -+} -+ -+static inline void rb_rotate_right(struct rb_tree *tree, struct rb_entry *e) -+{ -+ struct rb_entry *left = e->left; -+ -+ if (!e->parent) -+ tree->root = left; -+ else if (e->parent->left == e) -+ e->parent->left = left; -+ else -+ e->parent->right = left; -+ -+ e->left = left->right; -+ if (e->left) e->left->parent = e; -+ left->right = e; -+ left->parent = e->parent; -+ e->parent = left; -+} -+ -+static inline void rb_flip_color(struct rb_entry *entry) -+{ -+ entry->flags ^= RB_FLAG_RED; -+ entry->left->flags ^= RB_FLAG_RED; -+ entry->right->flags ^= RB_FLAG_RED; -+} -+ -+static inline struct rb_entry *rb_head(struct rb_entry *iter) -+{ -+ if (!iter) return NULL; -+ while (iter->left) iter = iter->left; -+ return iter; -+} -+ -+static inline struct rb_entry *rb_next(struct rb_entry *iter) -+{ -+ if (iter->right) return rb_head(iter->right); -+ while (iter->parent && iter->parent->right == iter) iter = iter->parent; -+ return iter->parent; -+} -+ -+static inline struct rb_entry *rb_postorder_head(struct rb_entry *iter) -+{ -+ if (!iter) return NULL; -+ -+ for (;;) { -+ while (iter->left) iter = iter->left; -+ if (!iter->right) return iter; -+ iter = iter->right; -+ } -+} -+ -+static inline struct rb_entry *rb_postorder_next(struct rb_entry *iter) -+{ -+ if (!iter->parent) return NULL; -+ if (iter == iter->parent->right || !iter->parent->right) return iter->parent; -+ return rb_postorder_head(iter->parent->right); -+} -+ -+/* iterate through the tree */ -+#define RB_FOR_EACH(cursor, tree) \ -+ for ((cursor) = rb_head((tree)->root); (cursor); (cursor) = rb_next(cursor)) -+ -+/* iterate through the tree using a tree entry */ -+#define RB_FOR_EACH_ENTRY(elem, tree, type, field) \ -+ for ((elem) = RB_ENTRY_VALUE(rb_head((tree)->root), type, field); \ -+ (elem) != RB_ENTRY_VALUE(0, type, field); \ -+ (elem) = RB_ENTRY_VALUE(rb_next(&elem->field), type, field)) -+ -+/* iterate through the tree using using postorder, making it safe to free the entry */ -+#define RB_FOR_EACH_DESTRUCTOR(cursor, cursor2, tree) \ -+ for ((cursor) = rb_postorder_head((tree)->root); \ -+ (cursor) && (((cursor2) = rb_postorder_next(cursor)) || 1); \ -+ (cursor) = (cursor2)) -+ -+/* iterate through the tree using a tree entry and postorder, making it safe to free the entry */ -+#define RB_FOR_EACH_ENTRY_DESTRUCTOR(elem, elem2, tree, type, field) \ -+ for ((elem) = RB_ENTRY_VALUE(rb_postorder_head((tree)->root), type, field); \ -+ (elem) != WINE_RB_ENTRY_VALUE(0, type, field) \ -+ && (((elem2) = RB_ENTRY_VALUE(rb_postorder_next(&(elem)->field), type, field)) || 1); \ -+ (elem) = (elem2)) -+ -+ -+static inline void rb_postorder(struct rb_tree *tree, rb_traverse_func *callback, void *context) -+{ -+ struct rb_entry *iter, *next; -+ RB_FOR_EACH_DESTRUCTOR(iter, next, tree) callback(iter, context); -+} -+ -+static inline void rb_init(struct rb_tree *tree, rb_compare_func compare) -+{ -+ tree->compare = compare; -+ tree->root = NULL; -+} -+ -+static inline void rb_for_each_entry(struct rb_tree *tree, rb_traverse_func *callback, void *context) -+{ -+ struct rb_entry *iter; -+ RB_FOR_EACH(iter, tree) callback(iter, context); -+} -+ -+static inline void rb_clear(struct rb_tree *tree, rb_traverse_func *callback, void *context) -+{ -+ /* Note that we use postorder here because the callback will likely free the entry. */ -+ if (callback) rb_postorder(tree, callback, context); -+ tree->root = NULL; -+} -+ -+static inline void rb_destroy(struct rb_tree *tree, rb_traverse_func *callback, void *context) -+{ -+ rb_clear(tree, callback, context); -+} -+ -+static inline struct rb_entry *rb_get(const struct rb_tree *tree, const void *key) -+{ -+ struct rb_entry *entry = tree->root; -+ while (entry) -+ { -+ int c = tree->compare(key, entry); -+ if (!c) return entry; -+ entry = c < 0 ? entry->left : entry->right; -+ } -+ return NULL; -+} -+ -+static inline int rb_put(struct rb_tree *tree, const void *key, struct rb_entry *entry) -+{ -+ struct rb_entry **iter = &tree->root, *parent = tree->root; -+ -+ while (*iter) -+ { -+ int c; -+ -+ parent = *iter; -+ c = tree->compare(key, parent); -+ if (!c) return -1; -+ else if (c < 0) iter = &parent->left; -+ else iter = &parent->right; -+ } -+ -+ entry->flags = RB_FLAG_RED; -+ entry->parent = parent; -+ entry->left = NULL; -+ entry->right = NULL; -+ *iter = entry; -+ -+ while (rb_is_red(entry->parent)) -+ { -+ if (entry->parent == entry->parent->parent->left) -+ { -+ if (rb_is_red(entry->parent->parent->right)) -+ { -+ rb_flip_color(entry->parent->parent); -+ entry = entry->parent->parent; -+ } -+ else -+ { -+ if (entry == entry->parent->right) -+ { -+ entry = entry->parent; -+ rb_rotate_left(tree, entry); -+ } -+ entry->parent->flags &= ~RB_FLAG_RED; -+ entry->parent->parent->flags |= RB_FLAG_RED; -+ rb_rotate_right(tree, entry->parent->parent); -+ } -+ } -+ else -+ { -+ if (rb_is_red(entry->parent->parent->left)) -+ { -+ rb_flip_color(entry->parent->parent); -+ entry = entry->parent->parent; -+ } -+ else -+ { -+ if (entry == entry->parent->left) -+ { -+ entry = entry->parent; -+ rb_rotate_right(tree, entry); -+ } -+ entry->parent->flags &= ~RB_FLAG_RED; -+ entry->parent->parent->flags |= RB_FLAG_RED; -+ rb_rotate_left(tree, entry->parent->parent); -+ } -+ } -+ } -+ -+ tree->root->flags &= ~RB_FLAG_RED; -+ -+ return 0; -+} -+ -+static inline void rb_remove(struct rb_tree *tree, struct rb_entry *entry) -+{ -+ struct rb_entry *iter, *child, *parent, *w; -+ int need_fixup; -+ -+ if (entry->right && entry->left) -+ for(iter = entry->right; iter->left; iter = iter->left); -+ else -+ iter = entry; -+ -+ child = iter->left ? iter->left : iter->right; -+ -+ if (!iter->parent) -+ tree->root = child; -+ else if (iter == iter->parent->left) -+ iter->parent->left = child; -+ else -+ iter->parent->right = child; -+ -+ if (child) child->parent = iter->parent; -+ parent = iter->parent; -+ -+ need_fixup = !rb_is_red(iter); -+ -+ if (entry != iter) -+ { -+ *iter = *entry; -+ if (!iter->parent) -+ tree->root = iter; -+ else if (entry == iter->parent->left) -+ iter->parent->left = iter; -+ else -+ iter->parent->right = iter; -+ -+ if (iter->right) iter->right->parent = iter; -+ if (iter->left) iter->left->parent = iter; -+ if (parent == entry) parent = iter; -+ } -+ -+ if (need_fixup) -+ { -+ while (parent && !rb_is_red(child)) -+ { -+ if (child == parent->left) -+ { -+ w = parent->right; -+ if (rb_is_red(w)) -+ { -+ w->flags &= ~RB_FLAG_RED; -+ parent->flags |= RB_FLAG_RED; -+ rb_rotate_left(tree, parent); -+ w = parent->right; -+ } -+ if (rb_is_red(w->left) || rb_is_red(w->right)) -+ { -+ if (!rb_is_red(w->right)) -+ { -+ w->left->flags &= ~RB_FLAG_RED; -+ w->flags |= RB_FLAG_RED; -+ rb_rotate_right(tree, w); -+ w = parent->right; -+ } -+ w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); -+ parent->flags &= ~RB_FLAG_RED; -+ if (w->right) -+ w->right->flags &= ~RB_FLAG_RED; -+ rb_rotate_left(tree, parent); -+ child = NULL; -+ break; -+ } -+ } -+ else -+ { -+ w = parent->left; -+ if (rb_is_red(w)) -+ { -+ w->flags &= ~RB_FLAG_RED; -+ parent->flags |= RB_FLAG_RED; -+ rb_rotate_right(tree, parent); -+ w = parent->left; -+ } -+ if (rb_is_red(w->left) || rb_is_red(w->right)) -+ { -+ if (!rb_is_red(w->left)) -+ { -+ w->right->flags &= ~RB_FLAG_RED; -+ w->flags |= RB_FLAG_RED; -+ rb_rotate_left(tree, w); -+ w = parent->left; -+ } -+ w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); -+ parent->flags &= ~RB_FLAG_RED; -+ if (w->left) -+ w->left->flags &= ~RB_FLAG_RED; -+ rb_rotate_right(tree, parent); -+ child = NULL; -+ break; -+ } -+ } -+ w->flags |= RB_FLAG_RED; -+ child = parent; -+ parent = child->parent; -+ } -+ if (child) child->flags &= ~RB_FLAG_RED; -+ } -+ -+ if (tree->root) tree->root->flags &= ~RB_FLAG_RED; -+} -+ -+static inline void rb_remove_key(struct rb_tree *tree, const void *key) -+{ -+ struct rb_entry *entry = rb_get(tree, key); -+ if (entry) rb_remove(tree, entry); -+} -+ -+#endif /* __WINE_WINE_RBTREE_H */ -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 3cf0422596c..f7d98f327f1 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -20,6 +20,7 @@ - #define __VKD3D_COMMON_H - - #include "config.h" -+#define WIN32_LEAN_AND_MEAN - #include "windows.h" - #include "vkd3d_types.h" - -@@ -28,6 +29,7 @@ - #include - #include - #include -+#include - - #ifdef _MSC_VER - #include -@@ -84,7 +86,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -@@ -171,6 +173,11 @@ static inline bool vkd3d_bound_range(size_t start, size_t count, size_t limit) - #endif - } - -+static inline bool vkd3d_object_range_overflow(size_t start, size_t count, size_t size) -+{ -+ return (~(size_t)0 - start) / size < count; -+} -+ - static inline uint16_t vkd3d_make_u16(uint8_t low, uint8_t high) - { - return low | ((uint16_t)high << 8); -@@ -186,6 +193,21 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) - return (x > y) - (x < y); - } - -+static inline bool bitmap_clear(uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] &= ~(1u << (idx & 0x1f)); -+} -+ -+static inline bool bitmap_set(uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] |= (1u << (idx & 0x1f)); -+} -+ -+static inline bool bitmap_is_set(const uint32_t *map, unsigned int idx) -+{ -+ return map[idx >> 5] & (1u << (idx & 0x1f)); -+} -+ - static inline int ascii_isupper(int c) - { - return 'A' <= c && c <= 'Z'; -@@ -249,6 +271,7 @@ static inline LONG InterlockedDecrement(LONG volatile *x) - # else - # error "InterlockedDecrement() not implemented for this platform" - # endif -+ - #endif /* _WIN32 */ - - static inline void vkd3d_parse_version(const char *version, int *major, int *minor) -diff --git a/libs/vkd3d/include/private/vkd3d_debug.h b/libs/vkd3d/include/private/vkd3d_debug.h -index 4f6d43af12f..6708cad344f 100644 ---- a/libs/vkd3d/include/private/vkd3d_debug.h -+++ b/libs/vkd3d/include/private/vkd3d_debug.h -@@ -91,7 +91,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - - #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) - --#define VKD3D_DEBUG_ENV_NAME(name) const char *vkd3d_dbg_env_name = name -+#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name - - static inline const char *debugstr_guid(const GUID *guid) - { -diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h -new file mode 100644 -index 00000000000..c9f8001e590 ---- /dev/null -+++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h -@@ -0,0 +1,67 @@ -+/* -+ * Copyright 2023 Conor McCarthy for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_SHADER_UTILS_H -+#define __VKD3D_SHADER_UTILS_H -+ -+#include "vkd3d_shader.h" -+ -+#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') -+#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') -+#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') -+ -+static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, -+ enum vkd3d_shader_source_type *type, char **messages) -+{ -+ struct vkd3d_shader_dxbc_desc desc; -+ enum vkd3d_result ret; -+ unsigned int i; -+ -+ *type = VKD3D_SHADER_SOURCE_NONE; -+ -+ if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) -+ return ret; -+ -+ for (i = 0; i < desc.section_count; ++i) -+ { -+ uint32_t tag = desc.sections[i].tag; -+ if (tag == TAG_SHDR || tag == TAG_SHEX) -+ { -+ *type = VKD3D_SHADER_SOURCE_DXBC_TPF; -+#ifndef VKD3D_SHADER_UNSUPPORTED_DXIL -+ break; -+#else -+ } -+ else if (tag == TAG_DXIL) -+ { -+ *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; -+ /* Default to DXIL if both are present. */ -+ break; -+#endif -+ } -+ } -+ -+ vkd3d_shader_free_dxbc(&desc); -+ -+ if (*type == VKD3D_SHADER_SOURCE_NONE) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ return VKD3D_OK; -+} -+ -+#endif /* __VKD3D_SHADER_UTILS_H */ -diff --git a/libs/vkd3d/include/private/vkd3d_test.h b/libs/vkd3d/include/private/vkd3d_test.h -new file mode 100644 -index 00000000000..081443c4fa6 ---- /dev/null -+++ b/libs/vkd3d/include/private/vkd3d_test.h -@@ -0,0 +1,432 @@ -+/* -+ * Copyright 2016 Józef Kucia for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_TEST_H -+#define __VKD3D_TEST_H -+ -+#include "vkd3d_common.h" -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+extern const char *vkd3d_test_name; -+extern const char *vkd3d_test_platform; -+ -+static void vkd3d_test_start_todo(bool is_todo); -+static int vkd3d_test_loop_todo(void); -+static void vkd3d_test_end_todo(void); -+ -+#define START_TEST(name) \ -+ const char *vkd3d_test_name = #name; \ -+ static void vkd3d_test_main(int argc, char **argv) -+ -+/* -+ * Use assert_that() for conditions that should always be true. -+ * todo_if() and bug_if() do not influence assert_that(). -+ */ -+#define assert_that assert_that_(__LINE__) -+ -+#define ok ok_(__LINE__) -+ -+#define skip skip_(__LINE__) -+ -+#define trace trace_(__LINE__) -+ -+#define assert_that_(line) \ -+ do { \ -+ unsigned int vkd3d_line = line; \ -+ VKD3D_TEST_ASSERT_THAT -+ -+#define VKD3D_TEST_ASSERT_THAT(...) \ -+ vkd3d_test_assert_that(vkd3d_line, __VA_ARGS__); } while (0) -+ -+#define ok_(line) \ -+ do { \ -+ unsigned int vkd3d_line = line; \ -+ VKD3D_TEST_OK -+ -+#define VKD3D_TEST_OK(...) \ -+ vkd3d_test_ok(vkd3d_line, __VA_ARGS__); } while (0) -+ -+#define todo_(line) \ -+ do { \ -+ unsigned int vkd3d_line = line; \ -+ VKD3D_TEST_TODO -+ -+#define VKD3D_TEST_TODO(...) \ -+ vkd3d_test_todo(vkd3d_line, __VA_ARGS__); } while (0) -+ -+#define skip_(line) \ -+ do { \ -+ unsigned int vkd3d_line = line; \ -+ VKD3D_TEST_SKIP -+ -+#define VKD3D_TEST_SKIP(...) \ -+ vkd3d_test_skip(vkd3d_line, __VA_ARGS__); } while (0) -+ -+#define trace_(line) \ -+ do { \ -+ unsigned int vkd3d_line = line; \ -+ VKD3D_TEST_TRACE -+ -+#define VKD3D_TEST_TRACE(...) \ -+ vkd3d_test_trace(vkd3d_line, __VA_ARGS__); } while (0) -+ -+#define todo_if(is_todo) \ -+ for (vkd3d_test_start_todo(is_todo); vkd3d_test_loop_todo(); vkd3d_test_end_todo()) -+ -+#define bug_if(is_bug) \ -+ for (vkd3d_test_start_bug(is_bug); vkd3d_test_loop_bug(); vkd3d_test_end_bug()) -+ -+#define todo todo_if(true) -+ -+struct vkd3d_test_state -+{ -+ LONG success_count; -+ LONG failure_count; -+ LONG skip_count; -+ LONG todo_count; -+ LONG todo_success_count; -+ LONG bug_count; -+ -+ unsigned int debug_level; -+ -+ unsigned int todo_level; -+ bool todo_do_loop; -+ -+ unsigned int bug_level; -+ bool bug_do_loop; -+ bool bug_enabled; -+ -+ const char *test_name_filter; -+ char context[8][128]; -+ unsigned int context_count; -+}; -+extern struct vkd3d_test_state vkd3d_test_state; -+ -+static bool -+vkd3d_test_platform_is_windows(void) -+{ -+ return !strcmp(vkd3d_test_platform, "windows"); -+} -+ -+static inline bool -+broken(bool condition) -+{ -+ return condition && vkd3d_test_platform_is_windows(); -+} -+ -+static void vkd3d_test_printf(unsigned int line, const char *msg) -+{ -+ unsigned int i; -+ -+ printf("%s:%u: ", vkd3d_test_name, line); -+ for (i = 0; i < vkd3d_test_state.context_count; ++i) -+ printf("%s: ", vkd3d_test_state.context[i]); -+ printf("%s", msg); -+} -+ -+static void -+vkd3d_test_check_assert_that(unsigned int line, bool result, const char *fmt, va_list args) -+{ -+ if (result) -+ { -+ InterlockedIncrement(&vkd3d_test_state.success_count); -+ if (vkd3d_test_state.debug_level > 1) -+ vkd3d_test_printf(line, "Test succeeded.\n"); -+ } -+ else -+ { -+ InterlockedIncrement(&vkd3d_test_state.failure_count); -+ vkd3d_test_printf(line, "Test failed: "); -+ vprintf(fmt, args); -+ } -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED -+vkd3d_test_assert_that(unsigned int line, bool result, const char *fmt, ...) -+{ -+ va_list args; -+ -+ va_start(args, fmt); -+ vkd3d_test_check_assert_that(line, result, fmt, args); -+ va_end(args); -+} -+ -+static void -+vkd3d_test_check_ok(unsigned int line, bool result, const char *fmt, va_list args) -+{ -+ bool is_todo = vkd3d_test_state.todo_level && !vkd3d_test_platform_is_windows(); -+ bool is_bug = vkd3d_test_state.bug_level && !vkd3d_test_platform_is_windows(); -+ -+ if (is_bug && vkd3d_test_state.bug_enabled) -+ { -+ InterlockedIncrement(&vkd3d_test_state.bug_count); -+ if (is_todo) -+ result = !result; -+ if (result) -+ vkd3d_test_printf(line, "Fixed bug: "); -+ else -+ vkd3d_test_printf(line, "Bug: "); -+ vprintf(fmt, args); -+ } -+ else if (is_todo) -+ { -+ if (result) -+ { -+ InterlockedIncrement(&vkd3d_test_state.todo_success_count); -+ vkd3d_test_printf(line, "Todo succeeded: "); -+ } -+ else -+ { -+ InterlockedIncrement(&vkd3d_test_state.todo_count); -+ vkd3d_test_printf(line, "Todo: "); -+ } -+ vprintf(fmt, args); -+ } -+ else -+ { -+ vkd3d_test_check_assert_that(line, result, fmt, args); -+ } -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED -+vkd3d_test_ok(unsigned int line, bool result, const char *fmt, ...) -+{ -+ va_list args; -+ -+ va_start(args, fmt); -+ vkd3d_test_check_ok(line, result, fmt, args); -+ va_end(args); -+} -+ -+static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED -+vkd3d_test_skip(unsigned int line, const char *fmt, ...) -+{ -+ va_list args; -+ va_start(args, fmt); -+ vkd3d_test_printf(line, "Test skipped: "); -+ vprintf(fmt, args); -+ va_end(args); -+ InterlockedIncrement(&vkd3d_test_state.skip_count); -+} -+ -+static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED -+vkd3d_test_trace(unsigned int line, const char *fmt, ...) -+{ -+ va_list args; -+ va_start(args, fmt); -+ vkd3d_test_printf(line, ""); -+ vprintf(fmt, args); -+ va_end(args); -+} -+ -+static void VKD3D_PRINTF_FUNC(1, 2) VKD3D_UNUSED -+vkd3d_test_debug(const char *fmt, ...) -+{ -+ char buffer[512]; -+ va_list args; -+ int size; -+ -+ size = snprintf(buffer, sizeof(buffer), "%s: ", vkd3d_test_name); -+ if (0 < size && size < sizeof(buffer)) -+ { -+ va_start(args, fmt); -+ vsnprintf(buffer + size, sizeof(buffer) - size, fmt, args); -+ va_end(args); -+ } -+ buffer[sizeof(buffer) - 1] = '\0'; -+ -+#ifdef _WIN32 -+ OutputDebugStringA(buffer); -+#endif -+ -+ if (vkd3d_test_state.debug_level > 0) -+ printf("%s\n", buffer); -+} -+ -+#ifndef VKD3D_TEST_NO_DEFS -+const char *vkd3d_test_platform = "other"; -+struct vkd3d_test_state vkd3d_test_state; -+ -+static void vkd3d_test_main(int argc, char **argv); -+ -+int main(int argc, char **argv) -+{ -+ const char *test_filter = getenv("VKD3D_TEST_FILTER"); -+ const char *debug_level = getenv("VKD3D_TEST_DEBUG"); -+ char *test_platform = getenv("VKD3D_TEST_PLATFORM"); -+ const char *bug = getenv("VKD3D_TEST_BUG"); -+ -+ memset(&vkd3d_test_state, 0, sizeof(vkd3d_test_state)); -+ vkd3d_test_state.debug_level = debug_level ? atoi(debug_level) : 0; -+ vkd3d_test_state.bug_enabled = bug ? atoi(bug) : true; -+ vkd3d_test_state.test_name_filter = test_filter; -+ -+ if (test_platform) -+ { -+ test_platform = strdup(test_platform); -+ vkd3d_test_platform = test_platform; -+ } -+ -+ if (vkd3d_test_state.debug_level > 1) -+ printf("Test platform: '%s'.\n", vkd3d_test_platform); -+ -+ vkd3d_test_main(argc, argv); -+ -+ printf("%s: %lu tests executed (%lu failures, %lu skipped, %lu todo, %lu bugs).\n", -+ vkd3d_test_name, -+ (unsigned long)(vkd3d_test_state.success_count -+ + vkd3d_test_state.failure_count + vkd3d_test_state.todo_count -+ + vkd3d_test_state.todo_success_count), -+ (unsigned long)(vkd3d_test_state.failure_count -+ + vkd3d_test_state.todo_success_count), -+ (unsigned long)vkd3d_test_state.skip_count, -+ (unsigned long)vkd3d_test_state.todo_count, -+ (unsigned long)vkd3d_test_state.bug_count); -+ -+ if (test_platform) -+ free(test_platform); -+ -+ return vkd3d_test_state.failure_count || vkd3d_test_state.todo_success_count; -+} -+ -+#ifdef _WIN32 -+static char *vkd3d_test_strdupWtoA(WCHAR *str) -+{ -+ char *out; -+ int len; -+ -+ if (!(len = WideCharToMultiByte(CP_ACP, 0, str, -1, NULL, 0, NULL, NULL))) -+ return NULL; -+ if (!(out = malloc(len))) -+ return NULL; -+ WideCharToMultiByte(CP_ACP, 0, str, -1, out, len, NULL, NULL); -+ -+ return out; -+} -+ -+static bool running_under_wine(void) -+{ -+ HMODULE module = GetModuleHandleA("ntdll.dll"); -+ return module && GetProcAddress(module, "wine_server_call"); -+} -+ -+int wmain(int argc, WCHAR **wargv) -+{ -+ char **argv; -+ int i, ret; -+ -+ argv = malloc(argc * sizeof(*argv)); -+ assert(argv); -+ for (i = 0; i < argc; ++i) -+ { -+ if (!(argv[i] = vkd3d_test_strdupWtoA(wargv[i]))) -+ break; -+ } -+ assert(i == argc); -+ -+ vkd3d_test_platform = running_under_wine() ? "wine" : "windows"; -+ -+ ret = main(argc, argv); -+ -+ for (i = 0; i < argc; ++i) -+ free(argv[i]); -+ free(argv); -+ -+ return ret; -+} -+#endif /* _WIN32 */ -+#endif /* VKD3D_TEST_NO_DEFS */ -+ -+typedef void (*vkd3d_test_pfn)(void); -+ -+static inline void vkd3d_run_test(const char *name, vkd3d_test_pfn test_pfn) -+{ -+ if (vkd3d_test_state.test_name_filter && !strstr(name, vkd3d_test_state.test_name_filter)) -+ return; -+ -+ vkd3d_test_debug("%s", name); -+ test_pfn(); -+} -+ -+static inline void vkd3d_test_start_todo(bool is_todo) -+{ -+ vkd3d_test_state.todo_level = (vkd3d_test_state.todo_level << 1) | is_todo; -+ vkd3d_test_state.todo_do_loop = true; -+} -+ -+static inline int vkd3d_test_loop_todo(void) -+{ -+ bool do_loop = vkd3d_test_state.todo_do_loop; -+ vkd3d_test_state.todo_do_loop = false; -+ return do_loop; -+} -+ -+static inline void vkd3d_test_end_todo(void) -+{ -+ vkd3d_test_state.todo_level >>= 1; -+} -+ -+static inline void vkd3d_test_start_bug(bool is_bug) -+{ -+ vkd3d_test_state.bug_level = (vkd3d_test_state.bug_level << 1) | is_bug; -+ vkd3d_test_state.bug_do_loop = true; -+} -+ -+static inline int vkd3d_test_loop_bug(void) -+{ -+ bool do_loop = vkd3d_test_state.bug_do_loop; -+ vkd3d_test_state.bug_do_loop = false; -+ return do_loop; -+} -+ -+static inline void vkd3d_test_end_bug(void) -+{ -+ vkd3d_test_state.bug_level >>= 1; -+} -+ -+static inline void vkd3d_test_push_context(const char *fmt, ...) -+{ -+ va_list args; -+ -+ if (vkd3d_test_state.context_count < ARRAY_SIZE(vkd3d_test_state.context)) -+ { -+ va_start(args, fmt); -+ vsnprintf(vkd3d_test_state.context[vkd3d_test_state.context_count], -+ sizeof(vkd3d_test_state.context), fmt, args); -+ va_end(args); -+ vkd3d_test_state.context[vkd3d_test_state.context_count][sizeof(vkd3d_test_state.context[0]) - 1] = '\0'; -+ } -+ ++vkd3d_test_state.context_count; -+} -+ -+static inline void vkd3d_test_pop_context(void) -+{ -+ if (vkd3d_test_state.context_count) -+ --vkd3d_test_state.context_count; -+} -+ -+#define run_test(test_pfn) \ -+ vkd3d_run_test(#test_pfn, test_pfn) -+ -+#endif /* __VKD3D_TEST_H */ -diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h -index ff2b15c51dc..0ddba9cc0a1 100644 ---- a/libs/vkd3d/include/vkd3d.h -+++ b/libs/vkd3d/include/vkd3d.h -@@ -76,6 +76,8 @@ enum vkd3d_api_version - VKD3D_API_VERSION_1_5, - VKD3D_API_VERSION_1_6, - VKD3D_API_VERSION_1_7, -+ VKD3D_API_VERSION_1_8, -+ VKD3D_API_VERSION_1_9, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), - }; -@@ -206,7 +208,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); - VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); - - VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); -+ -+/** -+ * Acquire the Vulkan queue backing a command queue. -+ * -+ * While a queue is acquired by the client, it is locked so that -+ * neither the vkd3d library nor other threads can submit work to -+ * it. For that reason it should be released as soon as possible with -+ * vkd3d_release_vk_queue(). The lock is not reentrant, so the same -+ * queue must not be acquired more than once by the same thread. -+ * -+ * Work submitted through the Direct3D 12 API exposed by vkd3d is not -+ * always immediately submitted to the Vulkan queue; sometimes it is -+ * kept in another internal queue, which might not necessarily be -+ * empty at the time vkd3d_acquire_vk_queue() is called. For this -+ * reason, work submitted directly to the Vulkan queue might appear to -+ * the Vulkan driver as being submitted before other work submitted -+ * though the Direct3D 12 API. If this is not desired, it is -+ * recommended to synchronize work submission using an ID3D12Fence -+ * object, by submitting to the queue a signal operation after all the -+ * Direct3D 12 work is submitted and waiting for it before calling -+ * vkd3d_acquire_vk_queue(). -+ * -+ * \since 1.0 -+ */ - VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); -+ -+/** -+ * Release the Vulkan queue backing a command queue. -+ * -+ * This must be paired to an earlier corresponding -+ * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan -+ * queue returned by vkd3d_acquire_vk_queue() must not be used any -+ * more. -+ * -+ * \since 1.0 -+ */ - VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); - - VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, -diff --git a/libs/vkd3d/include/vkd3d_d3d9types.h b/libs/vkd3d/include/vkd3d_d3d9types.h -new file mode 100644 -index 00000000000..75d0461409d ---- /dev/null -+++ b/libs/vkd3d/include/vkd3d_d3d9types.h -@@ -0,0 +1,237 @@ -+/* -+ * Copyright 2002-2003 Jason Edmeades -+ * Copyright 2002-2003 Raphael Junqueira -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_D3D9TYPES_H -+#define __VKD3D_D3D9TYPES_H -+#ifndef _d3d9TYPES_H_ -+ -+#ifndef MAKEFOURCC -+#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ -+ ((DWORD)(BYTE)(ch0) | ((DWORD)(BYTE)(ch1) << 8) | \ -+ ((DWORD)(BYTE)(ch2) << 16) | ((DWORD)(BYTE)(ch3) << 24 )) -+#endif -+ -+#define D3DSI_INSTLENGTH_SHIFT 24 -+ -+#define D3DSP_DCL_USAGE_SHIFT 0 -+#define D3DSP_DCL_USAGEINDEX_SHIFT 16 -+#define D3DSP_DSTMOD_SHIFT 20 -+ -+#define D3DSP_SRCMOD_SHIFT 24 -+ -+#define D3DSP_REGTYPE_SHIFT 28 -+#define D3DSP_REGTYPE_SHIFT2 8 -+#define D3DSP_REGTYPE_MASK (0x7 << D3DSP_REGTYPE_SHIFT) -+#define D3DSP_REGTYPE_MASK2 0x00001800 -+ -+#define D3DSP_WRITEMASK_0 0x00010000 -+#define D3DSP_WRITEMASK_1 0x00020000 -+#define D3DSP_WRITEMASK_2 0x00040000 -+#define D3DSP_WRITEMASK_3 0x00080000 -+#define D3DSP_WRITEMASK_ALL 0x000f0000 -+ -+#define D3DPS_VERSION(major, minor) (0xffff0000 | ((major) << 8) | (minor)) -+#define D3DVS_VERSION(major, minor) (0xfffe0000 | ((major) << 8) | (minor)) -+ -+typedef enum _D3DDECLUSAGE -+{ -+ D3DDECLUSAGE_POSITION = 0x0, -+ D3DDECLUSAGE_BLENDWEIGHT = 0x1, -+ D3DDECLUSAGE_BLENDINDICES = 0x2, -+ D3DDECLUSAGE_NORMAL = 0x3, -+ D3DDECLUSAGE_PSIZE = 0x4, -+ D3DDECLUSAGE_TEXCOORD = 0x5, -+ D3DDECLUSAGE_TANGENT = 0x6, -+ D3DDECLUSAGE_BINORMAL = 0x7, -+ D3DDECLUSAGE_TESSFACTOR = 0x8, -+ D3DDECLUSAGE_POSITIONT = 0x9, -+ D3DDECLUSAGE_COLOR = 0xa, -+ D3DDECLUSAGE_FOG = 0xb, -+ D3DDECLUSAGE_DEPTH = 0xc, -+ D3DDECLUSAGE_SAMPLE = 0xd, -+} D3DDECLUSAGE; -+ -+typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE -+{ -+ D3DSIO_NOP = 0x00, -+ D3DSIO_MOV = 0x01, -+ D3DSIO_ADD = 0x02, -+ D3DSIO_SUB = 0x03, -+ D3DSIO_MAD = 0x04, -+ D3DSIO_MUL = 0x05, -+ D3DSIO_RCP = 0x06, -+ D3DSIO_RSQ = 0x07, -+ D3DSIO_DP3 = 0x08, -+ D3DSIO_DP4 = 0x09, -+ D3DSIO_MIN = 0x0a, -+ D3DSIO_MAX = 0x0b, -+ D3DSIO_SLT = 0x0c, -+ D3DSIO_SGE = 0x0d, -+ D3DSIO_EXP = 0x0e, -+ D3DSIO_LOG = 0x0f, -+ D3DSIO_LIT = 0x10, -+ D3DSIO_DST = 0x11, -+ D3DSIO_LRP = 0x12, -+ D3DSIO_FRC = 0x13, -+ D3DSIO_M4x4 = 0x14, -+ D3DSIO_M4x3 = 0x15, -+ D3DSIO_M3x4 = 0x16, -+ D3DSIO_M3x3 = 0x17, -+ D3DSIO_M3x2 = 0x18, -+ D3DSIO_CALL = 0x19, -+ D3DSIO_CALLNZ = 0x1a, -+ D3DSIO_LOOP = 0x1b, -+ D3DSIO_RET = 0x1c, -+ D3DSIO_ENDLOOP = 0x1d, -+ D3DSIO_LABEL = 0x1e, -+ D3DSIO_DCL = 0x1f, -+ D3DSIO_POW = 0x20, -+ D3DSIO_CRS = 0x21, -+ D3DSIO_SGN = 0x22, -+ D3DSIO_ABS = 0x23, -+ D3DSIO_NRM = 0x24, -+ D3DSIO_SINCOS = 0x25, -+ D3DSIO_REP = 0x26, -+ D3DSIO_ENDREP = 0x27, -+ D3DSIO_IF = 0x28, -+ D3DSIO_IFC = 0x29, -+ D3DSIO_ELSE = 0x2a, -+ D3DSIO_ENDIF = 0x2b, -+ D3DSIO_BREAK = 0x2c, -+ D3DSIO_BREAKC = 0x2d, -+ D3DSIO_MOVA = 0x2e, -+ D3DSIO_DEFB = 0x2f, -+ D3DSIO_DEFI = 0x30, -+ -+ D3DSIO_TEXCOORD = 0x40, -+ D3DSIO_TEXKILL = 0x41, -+ D3DSIO_TEX = 0x42, -+ D3DSIO_TEXBEM = 0x43, -+ D3DSIO_TEXBEML = 0x44, -+ D3DSIO_TEXREG2AR = 0x45, -+ D3DSIO_TEXREG2GB = 0x46, -+ D3DSIO_TEXM3x2PAD = 0x47, -+ D3DSIO_TEXM3x2TEX = 0x48, -+ D3DSIO_TEXM3x3PAD = 0x49, -+ D3DSIO_TEXM3x3TEX = 0x4a, -+ D3DSIO_TEXM3x3DIFF = 0x4b, -+ D3DSIO_TEXM3x3SPEC = 0x4c, -+ D3DSIO_TEXM3x3VSPEC = 0x4d, -+ D3DSIO_EXPP = 0x4e, -+ D3DSIO_LOGP = 0x4f, -+ D3DSIO_CND = 0x50, -+ D3DSIO_DEF = 0x51, -+ D3DSIO_TEXREG2RGB = 0x52, -+ D3DSIO_TEXDP3TEX = 0x53, -+ D3DSIO_TEXM3x2DEPTH = 0x54, -+ D3DSIO_TEXDP3 = 0x55, -+ D3DSIO_TEXM3x3 = 0x56, -+ D3DSIO_TEXDEPTH = 0x57, -+ D3DSIO_CMP = 0x58, -+ D3DSIO_BEM = 0x59, -+ D3DSIO_DP2ADD = 0x5a, -+ D3DSIO_DSX = 0x5b, -+ D3DSIO_DSY = 0x5c, -+ D3DSIO_TEXLDD = 0x5d, -+ D3DSIO_SETP = 0x5e, -+ D3DSIO_TEXLDL = 0x5f, -+ D3DSIO_BREAKP = 0x60, -+ -+ D3DSIO_PHASE = 0xfffd, -+ D3DSIO_COMMENT = 0xfffe, -+ D3DSIO_END = 0xffff, -+ -+ D3DSIO_FORCE_DWORD = 0x7fffffff, -+} D3DSHADER_INSTRUCTION_OPCODE_TYPE; -+ -+typedef enum _D3DSHADER_PARAM_DSTMOD_TYPE -+{ -+ D3DSPDM_NONE = 0 << D3DSP_DSTMOD_SHIFT, -+ D3DSPDM_SATURATE = 1 << D3DSP_DSTMOD_SHIFT, -+ D3DSPDM_PARTIALPRECISION = 2 << D3DSP_DSTMOD_SHIFT, -+ D3DSPDM_MSAMPCENTROID = 4 << D3DSP_DSTMOD_SHIFT, -+ -+ D3DSPDM_FORCE_DWORD = 0x7fffffff, -+} D3DSHADER_PARAM_DSTMOD_TYPE; -+ -+typedef enum _D3DSHADER_PARAM_REGISTER_TYPE -+{ -+ D3DSPR_TEMP = 0x00, -+ D3DSPR_INPUT = 0x01, -+ D3DSPR_CONST = 0x02, -+ D3DSPR_ADDR = 0x03, -+ D3DSPR_TEXTURE = 0x03, -+ D3DSPR_RASTOUT = 0x04, -+ D3DSPR_ATTROUT = 0x05, -+ D3DSPR_TEXCRDOUT = 0x06, -+ D3DSPR_OUTPUT = 0x06, -+ D3DSPR_CONSTINT = 0x07, -+ D3DSPR_COLOROUT = 0x08, -+ D3DSPR_DEPTHOUT = 0x09, -+ D3DSPR_SAMPLER = 0x0a, -+ D3DSPR_CONST2 = 0x0b, -+ D3DSPR_CONST3 = 0x0c, -+ D3DSPR_CONST4 = 0x0d, -+ D3DSPR_CONSTBOOL = 0x0e, -+ D3DSPR_LOOP = 0x0f, -+ D3DSPR_TEMPFLOAT16 = 0x10, -+ D3DSPR_MISCTYPE = 0x11, -+ D3DSPR_LABEL = 0x12, -+ D3DSPR_PREDICATE = 0x13, -+ -+ D3DSPR_FORCE_DWORD = 0x7fffffff, -+} D3DSHADER_PARAM_REGISTER_TYPE; -+ -+typedef enum _D3DSHADER_PARAM_SRCMOD_TYPE -+{ -+ D3DSPSM_NONE = 0x0 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_NEG = 0x1 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_BIAS = 0x2 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_BIASNEG = 0x3 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_SIGN = 0x4 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_SIGNNEG = 0x5 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_COMP = 0x6 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_X2 = 0x7 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_X2NEG = 0x8 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_DZ = 0x9 << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_DW = 0xa << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_ABS = 0xb << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_ABSNEG = 0xc << D3DSP_SRCMOD_SHIFT, -+ D3DSPSM_NOT = 0xd << D3DSP_SRCMOD_SHIFT, -+ -+ D3DSPSM_FORCE_DWORD = 0x7fffffff, -+} D3DSHADER_PARAM_SRCMOD_TYPE; -+ -+typedef enum _D3DSHADER_MISCTYPE_OFFSETS -+{ -+ D3DSMO_POSITION = 0x0, -+ D3DSMO_FACE = 0x1, -+} D3DSHADER_MISCTYPE_OFFSETS; -+ -+typedef enum _D3DVS_RASTOUT_OFFSETS -+{ -+ D3DSRO_POSITION = 0x0, -+ D3DSRO_FOG = 0x1, -+ D3DSRO_POINT_SIZE = 0x2, -+ -+ D3DSRO_FORCE_DWORD = 0x7fffffff, -+} D3DVS_RASTOUT_OFFSETS; -+ -+#endif /* _d3d9TYPES_H_ */ -+#endif /* __VKD3D_D3D9TYPES_H */ -diff --git a/libs/vkd3d/include/vkd3d_d3dcompiler.h b/libs/vkd3d/include/vkd3d_d3dcompiler.h -new file mode 100644 -index 00000000000..c934835dc0a ---- /dev/null -+++ b/libs/vkd3d/include/vkd3d_d3dcompiler.h -@@ -0,0 +1,74 @@ -+/* -+ * Copyright 2010 Matteo Bruni for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_D3DCOMPILER_H -+#define __VKD3D_D3DCOMPILER_H -+#ifndef __D3DCOMPILER_H__ -+ -+#define D3DCOMPILE_DEBUG 0x00000001 -+#define D3DCOMPILE_SKIP_VALIDATION 0x00000002 -+#define D3DCOMPILE_SKIP_OPTIMIZATION 0x00000004 -+#define D3DCOMPILE_PACK_MATRIX_ROW_MAJOR 0x00000008 -+#define D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR 0x00000010 -+#define D3DCOMPILE_PARTIAL_PRECISION 0x00000020 -+#define D3DCOMPILE_FORCE_VS_SOFTWARE_NO_OPT 0x00000040 -+#define D3DCOMPILE_FORCE_PS_SOFTWARE_NO_OPT 0x00000080 -+#define D3DCOMPILE_NO_PRESHADER 0x00000100 -+#define D3DCOMPILE_AVOID_FLOW_CONTROL 0x00000200 -+#define D3DCOMPILE_PREFER_FLOW_CONTROL 0x00000400 -+#define D3DCOMPILE_ENABLE_STRICTNESS 0x00000800 -+#define D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY 0x00001000 -+#define D3DCOMPILE_IEEE_STRICTNESS 0x00002000 -+#define D3DCOMPILE_OPTIMIZATION_LEVEL0 0x00004000 -+#define D3DCOMPILE_OPTIMIZATION_LEVEL1 0x00000000 -+#define D3DCOMPILE_OPTIMIZATION_LEVEL2 0x0000c000 -+#define D3DCOMPILE_OPTIMIZATION_LEVEL3 0x00008000 -+#define D3DCOMPILE_RESERVED16 0x00010000 -+#define D3DCOMPILE_RESERVED17 0x00020000 -+#define D3DCOMPILE_WARNINGS_ARE_ERRORS 0x00040000 -+#define D3DCOMPILE_RESOURCES_MAY_ALIAS 0x00080000 -+#define D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES 0x00100000 -+#define D3DCOMPILE_ALL_RESOURCES_BOUND 0x00200000 -+#define D3DCOMPILE_DEBUG_NAME_FOR_SOURCE 0x00400000 -+#define D3DCOMPILE_DEBUG_NAME_FOR_BINARY 0x00800000 -+ -+#define D3DCOMPILE_EFFECT_CHILD_EFFECT 0x00000001 -+#define D3DCOMPILE_EFFECT_ALLOW_SLOW_OPS 0x00000002 -+ -+#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_LATEST 0x00000000 -+#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_0 0x00000010 -+#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_1 0x00000020 -+ -+#define D3DCOMPILE_SECDATA_MERGE_UAV_SLOTS 0x00000001 -+#define D3DCOMPILE_SECDATA_PRESERVE_TEMPLATE_SLOTS 0x00000002 -+#define D3DCOMPILE_SECDATA_REQUIRE_TEMPLATE_MATCH 0x00000004 -+ -+HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, -+ const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, -+ const char *profile, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); -+HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, -+ const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, -+ const char *profile, UINT flags, UINT effect_flags, UINT secondary_flags, -+ const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, -+ ID3DBlob **error_messages); -+HRESULT WINAPI D3DCreateBlob(SIZE_T size, ID3DBlob **blob); -+HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, const D3D_SHADER_MACRO *macros, -+ ID3DInclude *include, ID3DBlob **shader, ID3DBlob **error_messages); -+ -+#endif /* __D3DCOMPILER_H__ */ -+#endif /* __VKD3D_D3DCOMPILER_H */ -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 859b8c79792..01356ce3931 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -49,6 +49,8 @@ enum vkd3d_shader_api_version - VKD3D_SHADER_API_VERSION_1_5, - VKD3D_SHADER_API_VERSION_1_6, - VKD3D_SHADER_API_VERSION_1_7, -+ VKD3D_SHADER_API_VERSION_1_8, -+ VKD3D_SHADER_API_VERSION_1_9, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), - }; -@@ -84,6 +86,16 @@ enum vkd3d_shader_structure_type - * \since 1.3 - */ - VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, -+ /** -+ * The structure is a vkd3d_shader_scan_signature_info structure. -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, -+ /** -+ * The structure is a vkd3d_shader_varying_map_info structure. -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_VARYING_MAP_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -133,6 +145,15 @@ enum vkd3d_shader_compile_option_formatting_flags - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), - }; - -+/** Determines how matrices are stored. \since 1.9 */ -+enum vkd3d_shader_compile_option_pack_matrix_order -+{ -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR = 0x00000001, -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR = 0x00000002, -+ -+ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER), -+}; -+ - enum vkd3d_shader_compile_option_name - { - /** -@@ -163,6 +184,15 @@ enum vkd3d_shader_compile_option_name - * \since 1.7 - */ - VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE = 0x00000006, -+ /** -+ * This option specifies default matrix packing order for HLSL sources. -+ * Explicit variable modifiers or pragmas will take precedence. -+ * -+ * \a value is a member of enum vkd3d_shader_compile_option_pack_matrix_order. -+ * -+ * \since 1.9 -+ */ -+ VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER = 0x00000007, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), - }; -@@ -326,6 +356,25 @@ struct vkd3d_shader_parameter - } u; - }; - -+/** -+ * Symbolic register indices for mapping uniform constant register sets in -+ * legacy Direct3D bytecode to constant buffer views in the target environment. -+ * -+ * Members of this enumeration are used in -+ * \ref vkd3d_shader_resource_binding.register_index. -+ * -+ * \since 1.9 -+ */ -+enum vkd3d_shader_d3dbc_constant_register -+{ -+ /** The float constant register set, c# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER = 0x0, -+ /** The integer constant register set, i# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER = 0x1, -+ /** The boolean constant register set, b# in Direct3D assembly. */ -+ VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER = 0x2, -+}; -+ - /** - * Describes the mapping of a single resource or resource array to its binding - * point in the target environment. -@@ -350,7 +399,14 @@ struct vkd3d_shader_resource_binding - * support multiple register spaces, this parameter must be set to 0. - */ - unsigned int register_space; -- /** Register index of the DXBC resource. */ -+ /** -+ * Register index of the Direct3D resource. -+ * -+ * For legacy Direct3D shaders, vkd3d-shader maps each constant register -+ * set to a single constant buffer view. This parameter names the register -+ * set to map, and must be a member of -+ * enum vkd3d_shader_d3dbc_constant_register. -+ */ - unsigned int register_index; - /** Shader stage(s) to which the resource is visible. */ - enum vkd3d_shader_visibility shader_visibility; -@@ -610,6 +666,11 @@ enum vkd3d_shader_source_type - * model 1, 2, and 3 shaders. \since 1.3 - */ - VKD3D_SHADER_SOURCE_D3D_BYTECODE, -+ /** -+ * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is -+ * the format used for Direct3D shader model 6 shaders. \since 1.9 -+ */ -+ VKD3D_SHADER_SOURCE_DXBC_DXIL, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), - }; -@@ -619,7 +680,7 @@ enum vkd3d_shader_target_type - { - /** - * The shader has no type or is to be ignored. This is not a valid value -- * for vkd3d_shader_compile() or vkd3d_shader_scan(). -+ * for vkd3d_shader_compile(). - */ - VKD3D_SHADER_TARGET_NONE, - /** -@@ -1280,6 +1341,8 @@ enum vkd3d_shader_descriptor_info_flag - /** The descriptor is a UAV resource, on which the shader performs - * atomic ops. \since 1.6 */ - VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS = 0x00000008, -+ /** The descriptor is a raw (byte-addressed) buffer. \since 1.9 */ -+ VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER = 0x00000010, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_DESCRIPTOR_INFO_FLAG), - }; -@@ -1319,6 +1382,20 @@ struct vkd3d_shader_descriptor_info - * A chained structure enumerating the descriptors declared by a shader. - * - * This structure extends vkd3d_shader_compile_info. -+ * -+ * When scanning a legacy Direct3D shader, vkd3d-shader enumerates each -+ * constant register set used by the shader as a single constant buffer -+ * descriptor, as follows: -+ * - The \ref vkd3d_shader_descriptor_info.type field is set to -+ * VKD3D_SHADER_DESCRIPTOR_TYPE_CBV. -+ * - The \ref vkd3d_shader_descriptor_info.register_space field is set to zero. -+ * - The \ref vkd3d_shader_descriptor_info.register_index field is set to a -+ * member of enum vkd3d_shader_d3dbc_constant_register denoting which set -+ * is used. -+ * - The \ref vkd3d_shader_descriptor_info.count field is set to one. -+ * -+ * In summary, there may be up to three such descriptors, one for each register -+ * set used by the shader: float, integer, and boolean. - */ - struct vkd3d_shader_scan_descriptor_info - { -@@ -1388,6 +1465,24 @@ enum vkd3d_shader_sysval_semantic - VKD3D_SHADER_SV_TESS_FACTOR_TRIINT = 0x0e, - VKD3D_SHADER_SV_TESS_FACTOR_LINEDET = 0x0f, - VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN = 0x10, -+ /** Render target; SV_Target in Direct3D. \since 1.9 */ -+ VKD3D_SHADER_SV_TARGET = 0x40, -+ /** Depth; SV_Depth in Direct3D. \since 1.9 */ -+ VKD3D_SHADER_SV_DEPTH = 0x41, -+ /** Sample mask; SV_Coverage in Direct3D. \since 1.9 */ -+ VKD3D_SHADER_SV_COVERAGE = 0x42, -+ /** -+ * Depth, which is guaranteed to be greater than or equal to the current -+ * depth; SV_DepthGreaterEqual in Direct3D. \since 1.9 -+ */ -+ VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL = 0x43, -+ /** -+ * Depth, which is guaranteed to be less than or equal to the current -+ * depth; SV_DepthLessEqual in Direct3D. \since 1.9 -+ */ -+ VKD3D_SHADER_SV_DEPTH_LESS_EQUAL = 0x44, -+ /** Stencil reference; SV_StencilRef in Direct3D. \since 1.9 */ -+ VKD3D_SHADER_SV_STENCIL_REF = 0x45, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SYSVAL_SEMANTIC), - }; -@@ -1550,6 +1645,132 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com - | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); - } - -+/** -+ * A chained structure containing descriptions of shader inputs and outputs. -+ * -+ * This structure is currently implemented only for DXBC and legacy D3D bytecode -+ * source types. -+ * For DXBC shaders, the returned information is parsed directly from the -+ * signatures embedded in the DXBC shader. -+ * For legacy D3D shaders, the returned information is synthesized based on -+ * registers declared or used by shader instructions. -+ * For all other shader types, the structure is zeroed. -+ * -+ * All members (except for \ref type and \ref next) are output-only. -+ * -+ * This structure is passed to vkd3d_shader_scan() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * Members of this structure are allocated by vkd3d-shader and should be freed -+ * with vkd3d_shader_free_scan_signature_info() when no longer needed. -+ * -+ * All signatures may contain pointers into the input shader, and should only -+ * be accessed while the input shader remains valid. -+ * -+ * Signature elements are synthesized from legacy Direct3D bytecode as follows: -+ * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an -+ * uppercase string corresponding to the HLSL name for the usage, e.g. -+ * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. -+ * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the -+ * usage index. -+ * - The \ref vkd3d_shader_signature_element.stream_index is always 0. -+ * -+ * Signature elements are synthesized for any input or output register declared -+ * or used in a legacy Direct3D bytecode shader, including the following: -+ * - Shader model 1 and 2 colour and texture coordinate registers. -+ * - The shader model 1 pixel shader output register. -+ * - Shader model 1 and 2 vertex shader output registers (position, fog, and -+ * point size). -+ * - Shader model 3 pixel shader system value input registers (pixel position -+ * and face). -+ * -+ * \since 1.9 -+ */ -+struct vkd3d_shader_scan_signature_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** The shader input varyings. */ -+ struct vkd3d_shader_signature input; -+ -+ /** The shader output varyings. */ -+ struct vkd3d_shader_signature output; -+ -+ /** The shader patch constant varyings. */ -+ struct vkd3d_shader_signature patch_constant; -+}; -+ -+/** -+ * Describes the mapping of a output varying register in a shader stage, -+ * to an input varying register in the following shader stage. -+ * -+ * This structure is used in struct vkd3d_shader_varying_map_info. -+ */ -+struct vkd3d_shader_varying_map -+{ -+ /** -+ * The signature index (in the output signature) of the output varying. -+ * If greater than or equal to the number of elements in the output -+ * signature, signifies that the varying is consumed by the next stage but -+ * not written by this one. -+ */ -+ unsigned int output_signature_index; -+ /** The register index of the input varying to map this register to. */ -+ unsigned int input_register_index; -+ /** The mask consumed by the destination register. */ -+ unsigned int input_mask; -+}; -+ -+/** -+ * A chained structure which describes how output varyings in this shader stage -+ * should be mapped to input varyings in the next stage. -+ * -+ * This structure is optional. It should not be provided if there is no shader -+ * stage. -+ * However, depending on the input and output formats, this structure may be -+ * necessary in order to generate shaders which correctly match each other. -+ * -+ * If this structure is absent, vkd3d-shader will map varyings from one stage -+ * to another based on their register index. -+ * For Direct3D shader model 3.0, such a default mapping will be incorrect -+ * unless the registers are allocated in the same order, and hence this -+ * field is necessary to correctly match inter-stage varyings. -+ * This mapping may also be necessary under other circumstances where the -+ * varying interface does not match exactly. -+ * -+ * This structure is passed to vkd3d_shader_compile() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * This structure contains only input parameters. -+ * -+ * \since 1.9 -+ */ -+struct vkd3d_shader_varying_map_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_VARYING_MAP_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** -+ * A mapping of output varyings in this shader stage to input varyings -+ * in the next shader stage. -+ * -+ * This mapping should include exactly one element for each varying -+ * consumed by the next shader stage. -+ * If this shader stage outputs a varying that is not consumed by the next -+ * shader stage, that varying should be absent from this array. -+ * -+ * This mapping may be constructed by vkd3d_shader_build_varying_map(). -+ */ -+ const struct vkd3d_shader_varying_map *varying_map; -+ /** The number of registers provided in \ref varying_map. */ -+ unsigned int varying_count; -+}; -+ - #ifdef LIBVKD3D_SHADER_SOURCE - # define VKD3D_SHADER_API VKD3D_EXPORT - #else -@@ -1622,12 +1843,14 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * - * Depending on the source and target types, this function may support the - * following chained structures: -+ * - vkd3d_shader_hlsl_source_info - * - vkd3d_shader_interface_info -+ * - vkd3d_shader_varying_map_info - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * - vkd3d_shader_spirv_domain_shader_target_info - * - vkd3d_shader_spirv_target_info - * - vkd3d_shader_transform_feedback_info -- * - vkd3d_shader_hlsl_source_info - * - * \param compile_info A chained structure containing compilation parameters. - * -@@ -1783,6 +2006,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * Parse shader source code or byte code, returning various types of requested - * information. - * -+ * The \a source_type member of \a compile_info must be set to the type of the -+ * shader. -+ * -+ * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which -+ * case vkd3d_shader_scan() will return information about the shader in -+ * isolation. Alternatively, it may be set to a valid compilation target for the -+ * shader, in which case vkd3d_shader_scan() will return information that -+ * reflects the interface for a shader as it will be compiled to that target. -+ * In this case other chained structures may be appended to \a compile_info as -+ * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, -+ * such as vkd3d_shader_spirv_target_info. -+ * -+ * (For a hypothetical example, suppose the source shader distinguishes float -+ * and integer texture data, but the target environment does not support integer -+ * textures. In this case vkd3d_shader_compile() might translate integer -+ * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would -+ * accurately report whether the texture expects integer or float data, but -+ * using the relevant specific target type would report -+ * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) -+ * - * Currently this function supports the following code types: - * - VKD3D_SHADER_SOURCE_DXBC_TPF - * -@@ -1790,6 +2033,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver - * \n - * The DXBC_TPF scanner supports the following chained structures: - * - vkd3d_shader_scan_descriptor_info -+ * - vkd3d_shader_scan_signature_info - * \n - * Although the \a compile_info parameter is read-only, chained structures - * passed to this function need not be, and may serve as output parameters, -@@ -1826,12 +2070,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( - struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); - - /** -- * Read the input signature of a compiled shader, returning a structural -+ * Read the input signature of a compiled DXBC shader, returning a structural - * description which can be easily parsed by C code. - * - * This function parses a compiled shader. To parse a standalone root signature, - * use vkd3d_shader_parse_root_signature(). - * -+ * This function only parses DXBC shaders, and only retrieves the input -+ * signature. To retrieve signatures from other shader types, or other signature -+ * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. -+ * This function returns the same input signature that is returned in -+ * struct vkd3d_shader_scan_signature_info. -+ * - * \param dxbc Compiled byte code, in DXBC format. - * - * \param signature Output location in which the parsed root signature will be -@@ -2021,6 +2271,48 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb - VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** -+ * Free members of struct vkd3d_shader_scan_signature_info allocated by -+ * vkd3d_shader_scan(). -+ * -+ * This function may free members of vkd3d_shader_scan_signature_info, but -+ * does not free the structure itself. -+ * -+ * \param info Scan information to free. -+ * -+ * \since 1.9 -+ */ -+VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); -+ -+/** -+ * Build a mapping of output varyings in a shader stage to input varyings in -+ * the following shader stage. -+ * -+ * This mapping should be used in struct vkd3d_shader_varying_map_info to -+ * compile the first shader. -+ * -+ * \param output_signature The output signature of the first shader. -+ * -+ * \param input_signature The input signature of the second shader. -+ * -+ * \param count On output, contains the number of entries written into -+ * \ref varyings. -+ * -+ * \param varyings Pointer to an output array of varyings. -+ * This must point to space for N varyings, where N is the number of elements -+ * in the input signature. -+ * -+ * \remark Valid legacy Direct3D pixel shaders have at most 12 varying inputs: -+ * 10 inter-stage varyings, face, and position. -+ * Therefore, in practice, it is safe to call this function with a -+ * pre-allocated array with a fixed size of 12. -+ * -+ * \since 1.9 -+ */ -+VKD3D_SHADER_API void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *count, struct vkd3d_shader_varying_map *varyings); -+ - #endif /* VKD3D_SHADER_NO_PROTOTYPES */ - - /** Type of vkd3d_shader_get_version(). */ -@@ -2086,6 +2378,13 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, - typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, - const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); - -+/** Type of vkd3d_shader_build_varying_map(). \since 1.9 */ -+typedef void (*PFN_vkd3d_shader_build_varying_map)(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *count, struct vkd3d_shader_varying_map *varyings); -+/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ -+typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); -+ - #ifdef __cplusplus - } - #endif /* __cplusplus */ -diff --git a/libs/vkd3d/include/vkd3d_utils.h b/libs/vkd3d/include/vkd3d_utils.h -new file mode 100644 -index 00000000000..e8462563576 ---- /dev/null -+++ b/libs/vkd3d/include/vkd3d_utils.h -@@ -0,0 +1,108 @@ -+/* -+ * Copyright 2016 Józef Kucia for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_UTILS_H -+#define __VKD3D_UTILS_H -+ -+#include -+ -+#ifndef VKD3D_UTILS_API_VERSION -+#define VKD3D_UTILS_API_VERSION VKD3D_API_VERSION_1_0 -+#endif -+ -+#ifdef __cplusplus -+extern "C" { -+#endif /* __cplusplus */ -+ -+/** -+ * \file vkd3d_utils.h -+ * -+ * This file contains definitions for the vkd3d-utils library. -+ * -+ * The vkd3d-utils library is a collections of routines to ease the -+ * porting of a Direct3D 12 application to vkd3d. -+ * -+ * \since 1.0 -+ */ -+ -+#define VKD3D_WAIT_OBJECT_0 (0) -+#define VKD3D_WAIT_TIMEOUT (1) -+#define VKD3D_WAIT_FAILED (~0u) -+#define VKD3D_INFINITE (~0u) -+ -+#ifdef LIBVKD3D_UTILS_SOURCE -+# define VKD3D_UTILS_API VKD3D_EXPORT -+#else -+# define VKD3D_UTILS_API VKD3D_IMPORT -+#endif -+ -+/* 1.0 */ -+VKD3D_UTILS_API HANDLE vkd3d_create_event(void); -+VKD3D_UTILS_API HRESULT vkd3d_signal_event(HANDLE event); -+VKD3D_UTILS_API unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds); -+VKD3D_UTILS_API void vkd3d_destroy_event(HANDLE event); -+ -+#define D3D12CreateDevice(a, b, c, d) D3D12CreateDeviceVKD3D(a, b, c, d, VKD3D_UTILS_API_VERSION) -+VKD3D_UTILS_API HRESULT WINAPI D3D12CreateRootSignatureDeserializer( -+ const void *data, SIZE_T data_size, REFIID iid, void **deserializer); -+VKD3D_UTILS_API HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug); -+VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc, -+ D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob); -+ -+/* 1.2 */ -+VKD3D_UTILS_API HRESULT WINAPI D3D12CreateDeviceVKD3D(IUnknown *adapter, D3D_FEATURE_LEVEL feature_level, -+ REFIID iid, void **device, enum vkd3d_api_version api_version); -+VKD3D_UTILS_API HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data, -+ SIZE_T data_size, REFIID iid, void **deserializer); -+VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc, -+ ID3DBlob **blob, ID3DBlob **error_blob); -+ -+/* 1.3 */ -+VKD3D_UTILS_API HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, -+ const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, -+ const char *target, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); -+VKD3D_UTILS_API HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, -+ const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, -+ const char *target, UINT flags, UINT effect_flags, UINT secondary_flags, -+ const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, -+ ID3DBlob **error_messages); -+VKD3D_UTILS_API HRESULT WINAPI D3DCreateBlob(SIZE_T data_size, ID3DBlob **blob); -+VKD3D_UTILS_API HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, -+ const D3D_SHADER_MACRO *defines, ID3DInclude *include, -+ ID3DBlob **shader, ID3DBlob **error_messages); -+ -+/** -+ * Set a callback to be called when vkd3d-utils outputs debug logging. -+ * -+ * If NULL, or if this function has not been called, libvkd3d-utils will print -+ * all enabled log output to stderr. -+ * -+ * Calling this function will also set the log callback for libvkd3d and -+ * libvkd3d-shader. -+ * -+ * \param callback Callback function to set. -+ * -+ * \since 1.4 -+ */ -+VKD3D_UTILS_API void vkd3d_utils_set_log_callback(PFN_vkd3d_log callback); -+ -+#ifdef __cplusplus -+} -+#endif /* __cplusplus */ -+ -+#endif /* __VKD3D_UTILS_H */ -diff --git a/libs/vkd3d/include/vkd3d_windows.h b/libs/vkd3d/include/vkd3d_windows.h -new file mode 100644 -index 00000000000..7b0e972d828 ---- /dev/null -+++ b/libs/vkd3d/include/vkd3d_windows.h -@@ -0,0 +1,289 @@ -+/* -+ * Copyright 2016 Józef Kucia for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#ifndef __VKD3D_WINDOWS_H -+#define __VKD3D_WINDOWS_H -+#ifndef _INC_WINDOWS -+ -+/* Nameless unions */ -+#ifndef __C89_NAMELESS -+# ifdef NONAMELESSUNION -+# define __C89_NAMELESS -+# define __C89_NAMELESSUNIONNAME u -+# else -+# define __C89_NAMELESS -+# define __C89_NAMELESSUNIONNAME -+# endif /* NONAMELESSUNION */ -+#endif /* __C89_NAMELESS */ -+ -+#if !defined(_WIN32) || defined(__WIDL__) -+ -+# if !defined(__WIDL__) -+# if !defined(VKD3D_WIN32_WCHAR) -+# include -+# endif -+# include -+# endif -+ -+# ifdef __GNUC__ -+# define DECLSPEC_ALIGN(x) __attribute__((aligned(x))) -+# endif -+ -+/* HRESULT */ -+typedef int HRESULT; -+# define SUCCEEDED(hr) ((HRESULT)(hr) >= 0) -+# define FAILED(hr) ((HRESULT)(hr) < 0) -+ -+# define _HRESULT_TYPEDEF_(x) ((HRESULT)x) -+ -+# define S_OK _HRESULT_TYPEDEF_(0) -+# define S_FALSE _HRESULT_TYPEDEF_(1) -+ -+# define E_NOTIMPL _HRESULT_TYPEDEF_(0x80004001) -+# define E_NOINTERFACE _HRESULT_TYPEDEF_(0x80004002) -+# define E_POINTER _HRESULT_TYPEDEF_(0x80004003) -+# define E_ABORT _HRESULT_TYPEDEF_(0x80004004) -+# define E_FAIL _HRESULT_TYPEDEF_(0x80004005) -+# define E_OUTOFMEMORY _HRESULT_TYPEDEF_(0x8007000E) -+# define E_INVALIDARG _HRESULT_TYPEDEF_(0x80070057) -+ -+# define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) -+# define DXGI_ERROR_MORE_DATA _HRESULT_TYPEDEF_(0x887a0003) -+# define DXGI_ERROR_UNSUPPORTED _HRESULT_TYPEDEF_(0x887a0004) -+ -+# define D3DERR_INVALIDCALL _HRESULT_TYPEDEF_(0x8876086c) -+ -+/* Basic types */ -+typedef unsigned char BYTE; -+typedef unsigned int DWORD; -+typedef int INT; -+typedef unsigned int UINT; -+typedef int LONG; -+typedef unsigned int ULONG; -+typedef float FLOAT; -+typedef LONG BOOL; -+ -+/* Assuming LP64 model */ -+typedef char INT8; -+typedef unsigned char UINT8; -+typedef short INT16; -+typedef unsigned short UINT16; -+typedef int INT32; -+typedef unsigned int UINT32; -+# if defined(__WIDL__) -+typedef __int64 INT64; -+typedef unsigned __int64 UINT64; -+# else -+typedef int64_t DECLSPEC_ALIGN(8) INT64; -+typedef uint64_t DECLSPEC_ALIGN(8) UINT64; -+# endif -+typedef INT64 LONG64; -+typedef long LONG_PTR; -+typedef unsigned long ULONG_PTR; -+ -+typedef ULONG_PTR SIZE_T; -+ -+# ifdef VKD3D_WIN32_WCHAR -+typedef unsigned short WCHAR; -+# else -+typedef wchar_t WCHAR; -+# endif /* VKD3D_WIN32_WCHAR */ -+typedef void *HANDLE; -+ -+/* GUID */ -+# ifdef __WIDL__ -+typedef struct -+{ -+ unsigned long Data1; -+ unsigned short Data2; -+ unsigned short Data3; -+ unsigned char Data4[8]; -+} GUID; -+# else -+typedef struct _GUID -+{ -+ unsigned int Data1; -+ unsigned short Data2; -+ unsigned short Data3; -+ unsigned char Data4[8]; -+} GUID; -+# endif -+ -+typedef GUID IID; -+typedef GUID CLSID; -+typedef GUID UUID; -+ -+# ifdef INITGUID -+# ifndef __cplusplus -+# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ -+ const GUID name DECLSPEC_HIDDEN; \ -+ const GUID name = \ -+ { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} -+# else -+# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ -+ EXTERN_C const GUID name DECLSPEC_HIDDEN; \ -+ EXTERN_C const GUID name = \ -+ { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} -+# endif -+# else -+# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ -+ EXTERN_C const GUID name DECLSPEC_HIDDEN; -+# endif /* INITGUID */ -+ -+/* __uuidof emulation */ -+#if defined(__cplusplus) && !defined(_MSC_VER) -+ -+extern "C++" -+{ -+ template const GUID &__vkd3d_uuidof(); -+} -+ -+# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ -+ extern "C++" \ -+ { \ -+ template<> inline const GUID &__vkd3d_uuidof() \ -+ { \ -+ static const IID __uuid_inst = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}; \ -+ return __uuid_inst; \ -+ } \ -+ template<> inline const GUID &__vkd3d_uuidof() \ -+ { \ -+ return __vkd3d_uuidof(); \ -+ } \ -+ } -+ -+# define __uuidof(type) __vkd3d_uuidof() -+#else -+# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) -+#endif /* defined(__cplusplus) && !defined(_MSC_VER) */ -+ -+typedef struct SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES; -+#endif /* !defined(_WIN32) || defined(__WIDL__) */ -+ -+ -+#ifndef _WIN32 -+# include -+# include -+# include -+ -+# define COM_NO_WINDOWS_H -+ -+# define FORCEINLINE inline -+ -+# define CONTAINING_RECORD(address, type, field) \ -+ ((type *)((char *)(address) - offsetof(type, field))) -+ -+# ifdef __x86_64__ -+# define __stdcall __attribute__((ms_abi)) -+# else -+# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 2)) || defined(__APPLE__) -+# define __stdcall __attribute__((__stdcall__)) __attribute__((__force_align_arg_pointer__)) -+# else -+# define __stdcall __attribute__((__stdcall__)) -+# endif -+# endif -+ -+# define WINAPI __stdcall -+# define STDMETHODCALLTYPE __stdcall -+ -+# ifdef __GNUC__ -+# define DECLSPEC_SELECTANY __attribute__((weak)) -+# endif -+ -+/* Macros for COM interfaces */ -+# define interface struct -+# define BEGIN_INTERFACE -+# define END_INTERFACE -+# define MIDL_INTERFACE(x) struct -+ -+# ifdef __cplusplus -+# define EXTERN_C extern "C" -+# else -+# define EXTERN_C extern -+# endif -+ -+# define CONST_VTBL const -+ -+# define TRUE 1 -+# define FALSE 0 -+ -+# if defined(__cplusplus) && !defined(CINTERFACE) -+# define REFIID const IID & -+# define REFGUID const GUID & -+# define REFCLSID const CLSID & -+# else -+# define REFIID const IID * const -+# define REFGUID const GUID * const -+# define REFCLSID const CLSID * const -+# endif -+ -+#if defined(__cplusplus) && !defined(CINTERFACE) -+# define IsEqualGUID(guid1, guid2) (!memcmp(&(guid1), &(guid2), sizeof(GUID))) -+#else -+# define IsEqualGUID(guid1, guid2) (!memcmp(guid1, guid2, sizeof(GUID))) -+#endif -+ -+#elif !defined(__WIDL__) -+ -+# include -+ -+#endif /* _WIN32 */ -+ -+ -+/* Define DECLSPEC_HIDDEN */ -+#ifndef DECLSPEC_HIDDEN -+# if defined(__MINGW32__) -+# define DECLSPEC_HIDDEN -+# elif defined(__GNUC__) -+# define DECLSPEC_HIDDEN __attribute__((visibility("hidden"))) -+# else -+# define DECLSPEC_HIDDEN -+# endif -+#endif /* DECLSPEC_HIDDEN */ -+ -+/* Define min() & max() macros */ -+#ifndef NOMINMAX -+# ifndef min -+# define min(a, b) (((a) <= (b)) ? (a) : (b)) -+# endif -+ -+# ifndef max -+# define max(a, b) (((a) >= (b)) ? (a) : (b)) -+# endif -+#endif /* NOMINMAX */ -+ -+#ifndef DEFINE_ENUM_FLAG_OPERATORS -+#ifdef __cplusplus -+# define DEFINE_ENUM_FLAG_OPERATORS(type) \ -+extern "C++" \ -+{ \ -+ inline type operator &(type x, type y) { return (type)((int)x & (int)y); } \ -+ inline type operator &=(type &x, type y) { return (type &)((int &)x &= (int)y); } \ -+ inline type operator ~(type x) { return (type)~(int)x; } \ -+ inline type operator |(type x, type y) { return (type)((int)x | (int)y); } \ -+ inline type operator |=(type &x, type y) { return (type &)((int &)x |= (int)y); } \ -+ inline type operator ^(type x, type y) { return (type)((int)x ^ (int)y); } \ -+ inline type operator ^=(type &x, type y) { return (type &)((int &)x ^= (int)y); } \ -+} -+#else -+# define DEFINE_ENUM_FLAG_OPERATORS(type) -+#endif -+#endif /* DEFINE_ENUM_FLAG_OPERATORS */ -+ -+#endif /* _INC_WINDOWS */ -+#endif /* __VKD3D_WINDOWS_H */ -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index 30205088b1b..ce00e536d39 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -17,6 +17,7 @@ - */ - - #define COBJMACROS -+ - #include "vkd3d.h" - #include "vkd3d_blob.h" - #include "vkd3d_debug.h" -diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index 499334a35f1..aa7df5bd764 100644 ---- a/libs/vkd3d/libs/vkd3d-common/debug.c -+++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include - #ifdef HAVE_PTHREAD_H - #include - #endif -@@ -40,15 +41,15 @@ - #define VKD3D_DEBUG_BUFFER_COUNT 64 - #define VKD3D_DEBUG_BUFFER_SIZE 512 - --extern const char *vkd3d_dbg_env_name; -+extern const char *const vkd3d_dbg_env_name; - --static const char *debug_level_names[] = -+static const char *const debug_level_names[] = - { -- /* VKD3D_DBG_LEVEL_NONE */ "none", -- /* VKD3D_DBG_LEVEL_ERR */ "err", -- /* VKD3D_DBG_LEVEL_FIXME */ "fixme", -- /* VKD3D_DBG_LEVEL_WARN */ "warn", -- /* VKD3D_DBG_LEVEL_TRACE */ "trace", -+ [VKD3D_DBG_LEVEL_NONE ] = "none", -+ [VKD3D_DBG_LEVEL_ERR ] = "err", -+ [VKD3D_DBG_LEVEL_FIXME] = "fixme", -+ [VKD3D_DBG_LEVEL_WARN ] = "warn", -+ [VKD3D_DBG_LEVEL_TRACE] = "trace", - }; - - enum vkd3d_dbg_level vkd3d_dbg_get_level(void) -@@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch - - assert(level < ARRAY_SIZE(debug_level_names)); - -+#ifdef _WIN32 -+ vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); -+#elif HAVE_GETTID -+ vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); -+#else - vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); -+#endif - va_start(args, fmt); - vkd3d_dbg_voutput(fmt, args); - va_end(args); -diff --git a/libs/vkd3d/libs/vkd3d-shader/trace.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -similarity index 97% -rename from libs/vkd3d/libs/vkd3d-shader/trace.c -rename to libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 6cd2dcb270c..f0c386f1b3a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/trace.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -109,6 +109,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_DEQ ] = "deq", - [VKD3DSIH_DFMA ] = "dfma", - [VKD3DSIH_DGE ] = "dge", -+ [VKD3DSIH_DISCARD ] = "discard", - [VKD3DSIH_DIV ] = "div", - [VKD3DSIH_DLT ] = "dlt", - [VKD3DSIH_DMAX ] = "dmax", -@@ -577,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e - { - static const char *const resource_type_names[] = - { -- /* VKD3D_SHADER_RESOURCE_NONE */ "none", -- /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", -- /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", -+ [VKD3D_SHADER_RESOURCE_NONE ] = "none", -+ [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", -+ [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", - }; - - if (type < ARRAY_SIZE(resource_type_names)) -@@ -600,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const - { - static const char *const data_type_names[] = - { -- /* VKD3D_DATA_FLOAT */ "float", -- /* VKD3D_DATA_INT */ "int", -- /* VKD3D_DATA_RESOURCE */ "resource", -- /* VKD3D_DATA_SAMPLER */ "sampler", -- /* VKD3D_DATA_UAV */ "uav", -- /* VKD3D_DATA_UINT */ "uint", -- /* VKD3D_DATA_UNORM */ "unorm", -- /* VKD3D_DATA_SNORM */ "snorm", -- /* VKD3D_DATA_OPAQUE */ "opaque", -- /* VKD3D_DATA_MIXED */ "mixed", -- /* VKD3D_DATA_DOUBLE */ "double", -- /* VKD3D_DATA_CONTINUED */ "", -- /* VKD3D_DATA_UNUSED */ "", -+ [VKD3D_DATA_FLOAT ] = "float", -+ [VKD3D_DATA_INT ] = "int", -+ [VKD3D_DATA_RESOURCE ] = "resource", -+ [VKD3D_DATA_SAMPLER ] = "sampler", -+ [VKD3D_DATA_UAV ] = "uav", -+ [VKD3D_DATA_UINT ] = "uint", -+ [VKD3D_DATA_UNORM ] = "unorm", -+ [VKD3D_DATA_SNORM ] = "snorm", -+ [VKD3D_DATA_OPAQUE ] = "opaque", -+ [VKD3D_DATA_MIXED ] = "mixed", -+ [VKD3D_DATA_DOUBLE ] = "double", -+ [VKD3D_DATA_CONTINUED] = "", -+ [VKD3D_DATA_UNUSED ] = "", - }; - const char *name; - int i; -@@ -645,7 +646,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3D_SHADER_RESOURCE_TEXTURE_3D: -- shader_addline(buffer, "_3d"); -+ shader_addline(buffer, "_volume"); - break; - - case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: -@@ -660,8 +661,9 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, - else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) - { - if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) -- shader_addline(buffer, "_resource_"); -+ shader_addline(buffer, "_resource"); - -+ shader_addline(buffer, "_"); - shader_dump_resource_type(compiler, semantic->resource_type); - if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS - || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) -@@ -712,7 +714,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, - break; - - case VKD3D_DECL_USAGE_TEXCOORD: -- shader_addline(buffer, "texture%u", semantic->usage_idx); -+ shader_addline(buffer, "texcoord%u", semantic->usage_idx); - break; - - case VKD3D_DECL_USAGE_TANGENT: -@@ -1505,9 +1507,9 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - { - case VKD3DSIH_BREAKP: - case VKD3DSIH_CONTINUEP: -+ case VKD3DSIH_DISCARD: - case VKD3DSIH_IF: - case VKD3DSIH_RETP: -- case VKD3DSIH_TEXKILL: - switch (ins->flags) - { - case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; -@@ -1857,11 +1859,11 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - shader_addline(buffer, "\n"); - } - --enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) -+enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, -+ const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out) - { - enum vkd3d_shader_compile_option_formatting_flags formatting; -- struct vkd3d_shader_version *shader_version; - struct vkd3d_d3d_asm_compiler compiler; - enum vkd3d_result result = VKD3D_OK; - struct vkd3d_string_buffer *buffer; -@@ -1919,16 +1921,16 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - buffer = &compiler.buffer; - vkd3d_string_buffer_init(buffer); - -+ compiler.shader_version = *shader_version; - shader_version = &compiler.shader_version; -- *shader_version = parser->shader_version; - vkd3d_string_buffer_printf(buffer, "%s%s_%u_%u%s\n", compiler.colours.version, - shader_get_type_prefix(shader_version->type), shader_version->major, - shader_version->minor, compiler.colours.reset); - - indent = 0; -- for (i = 0; i < parser->instructions.count; ++i) -+ for (i = 0; i < instructions->count; ++i) - { -- struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; -+ struct vkd3d_shader_instruction *ins = &instructions->elements[i]; - - switch (ins->handler_idx) - { -@@ -1981,12 +1983,13 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - return result; - } - --void vkd3d_shader_trace(struct vkd3d_shader_parser *parser) -+void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, -+ const struct vkd3d_shader_version *shader_version) - { - const char *p, *q, *end; - struct vkd3d_shader_code code; - -- if (vkd3d_dxbc_binary_to_text(parser, NULL, &code) != VKD3D_OK) -+ if (vkd3d_dxbc_binary_to_text(instructions, shader_version, NULL, &code) != VKD3D_OK) - return; - - end = (const char *)code.code + code.size; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index ed81137d225..1fd5ab2446d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1,4 +1,6 @@ - /* -+ * d3dbc (Direct3D shader models 1-3 bytecode) support -+ * - * Copyright 2002-2003 Jason Edmeades - * Copyright 2002-2003 Raphael Junqueira - * Copyright 2004 Christian Costa -@@ -6,6 +8,7 @@ - * Copyright 2006 Ivan Gyurdiev - * Copyright 2007-2008 Stefan Dösinger for CodeWeavers - * Copyright 2009, 2021 Henri Verbeet for CodeWeavers -+ * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public -@@ -22,7 +25,7 @@ - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - --#include "vkd3d_shader_private.h" -+#include "hlsl.h" - - #define VKD3D_SM1_VS 0xfffeu - #define VKD3D_SM1_PS 0xffffu -@@ -207,10 +210,13 @@ struct vkd3d_sm1_opcode_info - struct vkd3d_shader_sm1_parser - { - const struct vkd3d_sm1_opcode_info *opcode_table; -- const uint32_t *start, *end; -+ const uint32_t *start, *end, *ptr; - bool abort; - - struct vkd3d_shader_parser p; -+ -+#define MAX_CONSTANT_COUNT 8192 -+ uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32]; - }; - - /* This table is not order or position dependent. */ -@@ -257,9 +263,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = - /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, - /* Constant definitions */ -- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, -+ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, - /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, -@@ -324,9 +330,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = - /* Declarations */ - {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, - /* Constant definitions */ -- {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, -+ {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, - {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, - /* Control flow */ - {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, - {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, -@@ -462,6 +468,7 @@ static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader - src->reg.idx[1].rel_addr = NULL; - src->reg.idx[2].offset = ~0u; - src->reg.idx[2].rel_addr = NULL; -+ src->reg.idx_count = 1; - src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); - src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; - } -@@ -480,11 +487,315 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader - dst->reg.idx[1].rel_addr = NULL; - dst->reg.idx[2].offset = ~0u; - dst->reg.idx[2].rel_addr = NULL; -+ dst->reg.idx_count = 1; - dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT; - dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; - dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; - } - -+static struct signature_element *find_signature_element(const struct shader_signature *signature, -+ const char *semantic_name, unsigned int semantic_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) -+ && e[i].semantic_index == semantic_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+static struct signature_element *find_signature_element_by_register_index( -+ const struct shader_signature *signature, unsigned int register_index) -+{ -+ struct signature_element *e = signature->elements; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ if (e[i].register_index == register_index) -+ return &e[i]; -+ } -+ -+ return NULL; -+} -+ -+#define SM1_COLOR_REGISTER_OFFSET 8 -+ -+static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, -+ unsigned int register_index, bool is_dcl, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if ((element = find_signature_element(signature, name, index))) -+ { -+ element->mask |= mask; -+ if (!is_dcl) -+ element->used_mask |= mask; -+ return true; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ return false; -+ element = &signature->elements[signature->element_count++]; -+ -+ element->semantic_name = name; -+ element->semantic_index = index; -+ element->stream_index = 0; -+ element->sysval_semantic = sysval; -+ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->register_index = register_index; -+ element->target_location = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = is_dcl ? 0 : mask; -+ element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; -+ -+ return true; -+} -+ -+static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, -+ unsigned int register_index, unsigned int mask) -+{ -+ struct shader_signature *signature; -+ struct signature_element *element; -+ -+ if (output) -+ signature = &sm1->p.shader_desc.output_signature; -+ else -+ signature = &sm1->p.shader_desc.input_signature; -+ -+ if (!(element = find_signature_element_by_register_index(signature, register_index))) -+ { -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, -+ "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); -+ return; -+ } -+ -+ element->used_mask |= mask; -+} -+ -+static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) -+{ -+ unsigned int register_index = reg->idx[0].offset; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL -+ && sm1->p.shader_version.major == 1 && !register_index) -+ return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_TARGET, 0, is_dcl, mask); -+ return true; -+ -+ case VKD3DSPR_INPUT: -+ /* For vertex shaders or sm3 pixel shaders, we should have already -+ * had a DCL instruction. Otherwise, this is a colour input. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, false, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, false, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); -+ -+ case VKD3DSPR_TEXTURE: -+ /* For vertex shaders, this is ADDR. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ return true; -+ return add_signature_element(sm1, false, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_OUTPUT: -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ { -+ /* For sm < 2 vertex shaders, this is TEXCRDOUT. -+ * -+ * For sm3 vertex shaders, this is OUTPUT, but we already -+ * should have had a DCL instruction. */ -+ if (sm1->p.shader_version.major == 3) -+ { -+ add_signature_mask(sm1, true, register_index, mask); -+ return true; -+ } -+ return add_signature_element(sm1, true, "TEXCOORD", register_index, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); -+ } -+ /* fall through */ -+ -+ case VKD3DSPR_ATTROUT: -+ return add_signature_element(sm1, true, "COLOR", register_index, -+ VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask); -+ -+ case VKD3DSPR_COLOROUT: -+ return add_signature_element(sm1, true, "COLOR", register_index, -+ VKD3D_SHADER_SV_TARGET, register_index, is_dcl, mask); -+ -+ case VKD3DSPR_DEPTHOUT: -+ return add_signature_element(sm1, true, "DEPTH", 0, -+ VKD3D_SHADER_SV_DEPTH, register_index, is_dcl, 0x1); -+ -+ case VKD3DSPR_RASTOUT: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, true, "POSITION", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, true, "FOG", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ case 2: -+ return add_signature_element(sm1, true, "PSIZE", 0, -+ VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid rasterizer output index %u.", register_index); -+ return true; -+ } -+ -+ case VKD3DSPR_MISCTYPE: -+ switch (register_index) -+ { -+ case 0: -+ return add_signature_element(sm1, false, "VPOS", 0, -+ VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); -+ -+ case 1: -+ return add_signature_element(sm1, false, "VFACE", 0, -+ VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); -+ -+ default: -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, -+ "Invalid miscellaneous fragment input index %u.", register_index); -+ return true; -+ } -+ -+ default: -+ return true; -+ } -+} -+ -+static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_semantic *semantic) -+{ -+ const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ unsigned int mask = semantic->resource.reg.write_mask; -+ bool output; -+ -+ static const char sm1_semantic_names[][13] = -+ { -+ [VKD3D_DECL_USAGE_POSITION ] = "POSITION", -+ [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", -+ [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", -+ [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", -+ [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", -+ [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", -+ [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", -+ [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", -+ [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", -+ [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", -+ [VKD3D_DECL_USAGE_COLOR ] = "COLOR", -+ [VKD3D_DECL_USAGE_FOG ] = "FOG", -+ [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", -+ [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", -+ }; -+ -+ if (reg->type == VKD3DSPR_OUTPUT) -+ output = true; -+ else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) -+ output = false; -+ else /* vpos and vface don't have a semantic. */ -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* sm2 pixel shaders use DCL but don't provide a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) -+ return add_signature_element_from_register(sm1, reg, true, mask); -+ -+ /* With the exception of vertex POSITION output, none of these are system -+ * values. Pixel POSITION input is not equivalent to SV_Position; the closer -+ * equivalent is VPOS, which is not declared as a semantic. */ -+ if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ -+ return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], -+ semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); -+} -+ -+static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, -+ enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) -+{ -+ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; -+ -+ desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); -+ if (from_def) -+ { -+ /* d3d shaders have a maximum of 8192 constants; we should not overrun -+ * this array. */ -+ assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); -+ bitmap_set(sm1->constant_def_mask[set], index); -+ } -+} -+ -+static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, -+ const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def) -+{ -+ struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; -+ uint32_t register_index = reg->idx[0].offset; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ desc->temp_count = max(desc->temp_count, register_index + 1); -+ break; -+ -+ case VKD3DSPR_CONST: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST2: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST3: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONST4: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONSTINT: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ case VKD3DSPR_CONSTBOOL: -+ record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def); -+ break; -+ -+ default: -+ break; -+ } -+ -+ add_signature_element_from_register(sm1, reg, false, mask); -+} -+ - /* Read a parameter token from the input stream, and possibly a relative - * addressing token. */ - static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, -@@ -635,6 +946,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, - range = &semantic->resource.range; - range->space = 0; - range->first = range->last = semantic->resource.reg.reg.idx[0].offset; -+ -+ add_signature_element_from_semantic(sm1, semantic); - } - - static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, -@@ -661,6 +974,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const - src_param->reg.idx[1].rel_addr = NULL; - src_param->reg.idx[2].offset = ~0u; - src_param->reg.idx[2].rel_addr = NULL; -+ src_param->reg.idx_count = 0; - src_param->reg.immconst_type = type; - memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t)); - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -@@ -671,7 +985,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const - - static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) - { -- const uint32_t **ptr = &sm1->p.ptr; -+ const uint32_t **ptr = &sm1->ptr; - const char *comment; - unsigned int size; - size_t remaining; -@@ -738,13 +1052,20 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, - } - } - --static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) -+static unsigned int mask_from_swizzle(unsigned int swizzle) -+{ -+ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); -+} -+ -+static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) - { -- struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); - struct vkd3d_shader_src_param *src_params, *predicate; - const struct vkd3d_sm1_opcode_info *opcode_info; - struct vkd3d_shader_dst_param *dst_param; -- const uint32_t **ptr = &parser->ptr; -+ const uint32_t **ptr = &sm1->ptr; - uint32_t opcode_token; - const uint32_t *p; - bool predicated; -@@ -758,11 +1079,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru - goto fail; - } - -- ++parser->location.line; -+ ++sm1->p.location.line; - opcode_token = read_u32(ptr); - if (!(opcode_info = shader_sm1_get_opcode_info(sm1, opcode_token & VKD3D_SM1_OPCODE_MASK))) - { -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, - "Invalid opcode %#x (token 0x%08x, shader version %u.%u).", - opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token, - sm1->p.shader_version.major, sm1->p.shader_version.minor); -@@ -775,14 +1096,14 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru - ins->raw = false; - ins->structured = false; - predicated = !!(opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED); -- ins->predicate = predicate = predicated ? shader_parser_get_src_params(parser, 1) : NULL; -+ ins->predicate = predicate = predicated ? shader_parser_get_src_params(&sm1->p, 1) : NULL; - ins->dst_count = opcode_info->dst_count; -- ins->dst = dst_param = shader_parser_get_dst_params(parser, ins->dst_count); -+ ins->dst = dst_param = shader_parser_get_dst_params(&sm1->p, ins->dst_count); - ins->src_count = opcode_info->src_count; -- ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); -+ ins->src = src_params = shader_parser_get_src_params(&sm1->p, ins->src_count); - if ((!predicate && predicated) || (!src_params && ins->src_count) || (!dst_param && ins->dst_count)) - { -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); -+ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); - goto fail; - } - -@@ -812,22 +1133,28 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else if (ins->handler_idx == VKD3DSIH_DEFB) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else if (ins->handler_idx == VKD3DSIH_DEFI) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } - else - { - /* Destination token */ - if (ins->dst_count) -+ { - shader_sm1_read_dst_param(sm1, &p, dst_param); -+ shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false); -+ } - - /* Predication token */ - if (ins->predicate) -@@ -835,7 +1162,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru - - /* Other source tokens */ - for (i = 0; i < ins->src_count; ++i) -+ { - shader_sm1_read_src_param(sm1, &p, &src_params[i]); -+ shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false); -+ } - } - - if (sm1->abort) -@@ -852,10 +1182,9 @@ fail: - *ptr = sm1->end; - } - --static bool shader_sm1_is_end(struct vkd3d_shader_parser *parser) -+static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) - { -- struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); -- const uint32_t **ptr = &parser->ptr; -+ const uint32_t **ptr = &sm1->ptr; - - shader_sm1_read_comment(sm1); - -@@ -938,17 +1267,35 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, - shader_desc = &sm1->p.shader_desc; - shader_desc->byte_code = code; - shader_desc->byte_code_size = code_size; -- sm1->p.ptr = sm1->start; -+ sm1->ptr = sm1->start; - - return VKD3D_OK; - } - -+static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, -+ enum vkd3d_shader_d3dbc_constant_register set) -+{ -+ unsigned int j; -+ -+ /* Find the highest constant index which is not written by a DEF -+ * instruction. We can't (easily) use an FFZ function for this since it -+ * needs to be limited by the highest used register index. */ -+ for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) -+ { -+ if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) -+ return j; -+ } -+ -+ return 0; -+} -+ - int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) - { - struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm1_parser *sm1; -+ unsigned int i; - int ret; - - if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1)))) -@@ -965,7 +1312,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - } - - instructions = &sm1->p.instructions; -- while (!shader_sm1_is_end(&sm1->p)) -+ while (!shader_sm1_is_end(sm1)) - { - if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { -@@ -975,7 +1322,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ins = &instructions->elements[instructions->count]; -- shader_sm1_read_instruction(&sm1->p, ins); -+ shader_sm1_read_instruction(sm1, ins); - - if (ins->handler_idx == VKD3DSIH_INVALID) - { -@@ -988,5 +1335,1133 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - - *parser = &sm1->p; - -- return VKD3D_OK; -+ for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) -+ sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); -+ -+ return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; -+} -+ -+bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -+ bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -+{ -+ unsigned int i; -+ -+ static const struct -+ { -+ const char *semantic; -+ bool output; -+ enum vkd3d_shader_type shader_type; -+ unsigned int major_version; -+ D3DSHADER_PARAM_REGISTER_TYPE type; -+ DWORD offset; -+ } -+ register_table[] = -+ { -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, -+ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, -+ }; -+ -+ for (i = 0; i < ARRAY_SIZE(register_table); ++i) -+ { -+ if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -+ && output == register_table[i].output -+ && ctx->profile->type == register_table[i].shader_type -+ && ctx->profile->major_version == register_table[i].major_version) -+ { -+ *type = register_table[i].type; -+ if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) -+ *reg = register_table[i].offset; -+ else -+ *reg = semantic->index; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -+{ -+ static const struct -+ { -+ const char *name; -+ D3DDECLUSAGE usage; -+ } -+ semantics[] = -+ { -+ {"binormal", D3DDECLUSAGE_BINORMAL}, -+ {"blendindices", D3DDECLUSAGE_BLENDINDICES}, -+ {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, -+ {"color", D3DDECLUSAGE_COLOR}, -+ {"depth", D3DDECLUSAGE_DEPTH}, -+ {"fog", D3DDECLUSAGE_FOG}, -+ {"normal", D3DDECLUSAGE_NORMAL}, -+ {"position", D3DDECLUSAGE_POSITION}, -+ {"positiont", D3DDECLUSAGE_POSITIONT}, -+ {"psize", D3DDECLUSAGE_PSIZE}, -+ {"sample", D3DDECLUSAGE_SAMPLE}, -+ {"sv_depth", D3DDECLUSAGE_DEPTH}, -+ {"sv_position", D3DDECLUSAGE_POSITION}, -+ {"sv_target", D3DDECLUSAGE_COLOR}, -+ {"tangent", D3DDECLUSAGE_TANGENT}, -+ {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, -+ {"texcoord", D3DDECLUSAGE_TEXCOORD}, -+ }; -+ -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(semantics); ++i) -+ { -+ if (!ascii_strcasecmp(semantic->name, semantics[i].name)) -+ { -+ *usage = semantics[i].usage; -+ *usage_idx = semantic->index; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) -+{ -+ if (type == VKD3D_SHADER_TYPE_VERTEX) -+ return D3DVS_VERSION(major, minor); -+ else -+ return D3DPS_VERSION(major, minor); -+} -+ -+static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) -+{ -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ return sm1_class(type->e.array.type); -+ case HLSL_CLASS_MATRIX: -+ assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -+ return D3DXPC_MATRIX_COLUMNS; -+ else -+ return D3DXPC_MATRIX_ROWS; -+ case HLSL_CLASS_OBJECT: -+ return D3DXPC_OBJECT; -+ case HLSL_CLASS_SCALAR: -+ return D3DXPC_SCALAR; -+ case HLSL_CLASS_STRUCT: -+ return D3DXPC_STRUCT; -+ case HLSL_CLASS_VECTOR: -+ return D3DXPC_VECTOR; -+ default: -+ ERR("Invalid class %#x.\n", type->class); -+ vkd3d_unreachable(); -+ } -+} -+ -+static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) -+{ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_BOOL: -+ return D3DXPT_BOOL; -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ return D3DXPT_FLOAT; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ return D3DXPT_INT; -+ case HLSL_TYPE_PIXELSHADER: -+ return D3DXPT_PIXELSHADER; -+ case HLSL_TYPE_SAMPLER: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3DXPT_SAMPLER1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3DXPT_SAMPLER2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3DXPT_SAMPLER3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3DXPT_SAMPLERCUBE; -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return D3DXPT_SAMPLER; -+ default: -+ ERR("Invalid dimension %#x.\n", type->sampler_dim); -+ vkd3d_unreachable(); -+ } -+ break; -+ case HLSL_TYPE_STRING: -+ return D3DXPT_STRING; -+ case HLSL_TYPE_TEXTURE: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3DXPT_TEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3DXPT_TEXTURE2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3DXPT_TEXTURE3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3DXPT_TEXTURECUBE; -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return D3DXPT_TEXTURE; -+ default: -+ ERR("Invalid dimension %#x.\n", type->sampler_dim); -+ vkd3d_unreachable(); -+ } -+ break; -+ case HLSL_TYPE_VERTEXSHADER: -+ return D3DXPT_VERTEXSHADER; -+ case HLSL_TYPE_VOID: -+ return D3DXPT_VOID; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -+{ -+ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); -+ unsigned int array_size = hlsl_get_multiarray_size(type); -+ unsigned int field_count = 0; -+ size_t fields_offset = 0; -+ size_t i; -+ -+ if (type->bytecode_offset) -+ return; -+ -+ if (array_type->class == HLSL_CLASS_STRUCT) -+ { -+ field_count = array_type->e.record.field_count; -+ -+ for (i = 0; i < field_count; ++i) -+ { -+ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -+ -+ field->name_bytecode_offset = put_string(buffer, field->name); -+ write_sm1_type(buffer, field->type, ctab_start); -+ } -+ -+ fields_offset = bytecode_align(buffer) - ctab_start; -+ -+ for (i = 0; i < field_count; ++i) -+ { -+ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -+ -+ put_u32(buffer, field->name_bytecode_offset - ctab_start); -+ put_u32(buffer, field->type->bytecode_offset - ctab_start); -+ } -+ } -+ -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); -+ put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -+ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -+ put_u32(buffer, fields_offset); -+} -+ -+static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -+{ -+ struct hlsl_ir_var *var; -+ -+ list_remove(&to_sort->extern_entry); -+ -+ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) -+ { -+ if (strcmp(to_sort->name, var->name) < 0) -+ { -+ list_add_before(&var->extern_entry, &to_sort->extern_entry); -+ return; -+ } -+ } -+ -+ list_add_tail(sorted, &to_sort->extern_entry); -+} -+ -+static void sm1_sort_externs(struct hlsl_ctx *ctx) -+{ -+ struct list sorted = LIST_INIT(sorted); -+ struct hlsl_ir_var *var, *next; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_uniform) -+ sm1_sort_extern(&sorted, var); -+ } -+ list_move_tail(&ctx->extern_vars, &sorted); -+} -+ -+static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ struct hlsl_ir_function_decl *entry_func) -+{ -+ size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; -+ unsigned int uniform_count = 0; -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ unsigned int r; -+ -+ for (r = 0; r <= HLSL_REGSET_LAST; ++r) -+ { -+ if (var->semantic.name || !var->regs[r].allocated) -+ continue; -+ -+ ++uniform_count; -+ -+ if (var->is_param && var->is_uniform) -+ { -+ char *new_name; -+ -+ if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) -+ return; -+ vkd3d_free((char *)var->name); -+ var->name = new_name; -+ } -+ } -+ } -+ -+ sm1_sort_externs(ctx); -+ -+ size_offset = put_u32(buffer, 0); -+ ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); -+ -+ ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); -+ creator_offset = put_u32(buffer, 0); -+ put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); -+ put_u32(buffer, uniform_count); -+ put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ -+ put_u32(buffer, 0); /* FIXME: flags */ -+ put_u32(buffer, 0); /* FIXME: target string */ -+ -+ vars_start = bytecode_align(buffer); -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ unsigned int r; -+ -+ for (r = 0; r <= HLSL_REGSET_LAST; ++r) -+ { -+ if (var->semantic.name || !var->regs[r].allocated) -+ continue; -+ -+ put_u32(buffer, 0); /* name */ -+ if (r == HLSL_REGSET_NUMERIC) -+ { -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); -+ put_u32(buffer, var->data_type->reg_size[r] / 4); -+ } -+ else -+ { -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -+ put_u32(buffer, var->bind_count[r]); -+ } -+ put_u32(buffer, 0); /* type */ -+ put_u32(buffer, 0); /* FIXME: default value */ -+ } -+ } -+ -+ uniform_count = 0; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ unsigned int r; -+ -+ for (r = 0; r <= HLSL_REGSET_LAST; ++r) -+ { -+ size_t var_offset, name_offset; -+ -+ if (var->semantic.name || !var->regs[r].allocated) -+ continue; -+ -+ var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); -+ -+ name_offset = put_string(buffer, var->name); -+ set_u32(buffer, var_offset, name_offset - ctab_start); -+ -+ write_sm1_type(buffer, var->data_type, ctab_start); -+ set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); -+ ++uniform_count; -+ } -+ } -+ -+ offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); -+ set_u32(buffer, creator_offset, offset - ctab_start); -+ -+ ctab_end = bytecode_align(buffer); -+ set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); -+} -+ -+static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -+{ -+ return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) -+ | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -+} -+ -+struct sm1_instruction -+{ -+ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; -+ -+ struct sm1_dst_register -+ { -+ D3DSHADER_PARAM_REGISTER_TYPE type; -+ D3DSHADER_PARAM_DSTMOD_TYPE mod; -+ unsigned int writemask; -+ uint32_t reg; -+ } dst; -+ -+ struct sm1_src_register -+ { -+ D3DSHADER_PARAM_REGISTER_TYPE type; -+ D3DSHADER_PARAM_SRCMOD_TYPE mod; -+ unsigned int swizzle; -+ uint32_t reg; -+ } srcs[3]; -+ unsigned int src_count; -+ -+ unsigned int has_dst; -+}; -+ -+static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) -+{ -+ assert(reg->writemask); -+ put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); -+} -+ -+static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, -+ const struct sm1_src_register *reg) -+{ -+ put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); -+} -+ -+static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct sm1_instruction *instr) -+{ -+ uint32_t token = instr->opcode; -+ unsigned int i; -+ -+ if (ctx->profile->major_version > 1) -+ token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; -+ put_u32(buffer, token); -+ -+ if (instr->has_dst) -+ write_sm1_dst_register(buffer, &instr->dst); -+ -+ for (i = 0; i < instr->src_count; ++i) -+ write_sm1_src_register(buffer, &instr->srcs[i]); -+}; -+ -+static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) -+{ -+ src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); -+} -+ -+static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, -+ const struct hlsl_reg *src3) -+{ -+ struct sm1_instruction instr = -+ { -+ .opcode = D3DSIO_DP2ADD, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.writemask = dst->writemask, -+ .dst.reg = dst->id, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -+ .srcs[0].reg = src1->id, -+ .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -+ .srcs[1].reg = src2->id, -+ .srcs[2].type = D3DSPR_TEMP, -+ .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), -+ .srcs[2].reg = src3->id, -+ .src_count = 3, -+ }; -+ -+ write_sm1_instruction(ctx, buffer, &instr); -+} -+ -+static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+{ -+ struct sm1_instruction instr = -+ { -+ .opcode = opcode, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.writemask = dst->writemask, -+ .dst.reg = dst->id, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -+ .srcs[0].reg = src1->id, -+ .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -+ .srcs[1].reg = src2->id, -+ .src_count = 2, -+ }; -+ -+ sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -+ sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); -+ write_sm1_instruction(ctx, buffer, &instr); -+} -+ -+static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+{ -+ struct sm1_instruction instr = -+ { -+ .opcode = opcode, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.writemask = dst->writemask, -+ .dst.reg = dst->id, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -+ .srcs[0].reg = src1->id, -+ .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -+ .srcs[1].reg = src2->id, -+ .src_count = 2, -+ }; -+ -+ write_sm1_instruction(ctx, buffer, &instr); -+} -+ -+static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -+{ -+ struct sm1_instruction instr = -+ { -+ .opcode = opcode, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.mod = dst_mod, -+ .dst.writemask = dst->writemask, -+ .dst.reg = dst->id, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), -+ .srcs[0].reg = src->id, -+ .srcs[0].mod = src_mod, -+ .src_count = 1, -+ }; -+ -+ sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -+ write_sm1_instruction(ctx, buffer, &instr); -+} -+ -+static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+{ -+ unsigned int i, x; -+ -+ for (i = 0; i < ctx->constant_defs.count; ++i) -+ { -+ const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; -+ uint32_t token = D3DSIO_DEF; -+ const struct sm1_dst_register reg = -+ { -+ .type = D3DSPR_CONST, -+ .writemask = VKD3DSP_WRITEMASK_ALL, -+ .reg = constant_reg->index, -+ }; -+ -+ if (ctx->profile->major_version > 1) -+ token |= 5 << D3DSI_INSTLENGTH_SHIFT; -+ put_u32(buffer, token); -+ -+ write_sm1_dst_register(buffer, ®); -+ for (x = 0; x < 4; ++x) -+ put_f32(buffer, constant_reg->value.f[x]); -+ } -+} -+ -+static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_var *var, bool output) -+{ -+ struct sm1_dst_register reg = {0}; -+ uint32_t token, usage_idx; -+ D3DDECLUSAGE usage; -+ bool ret; -+ -+ if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) -+ { -+ usage = 0; -+ usage_idx = 0; -+ } -+ else -+ { -+ ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); -+ assert(ret); -+ reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; -+ reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; -+ } -+ -+ token = D3DSIO_DCL; -+ if (ctx->profile->major_version > 1) -+ token |= 2 << D3DSI_INSTLENGTH_SHIFT; -+ put_u32(buffer, token); -+ -+ token = (1u << 31); -+ token |= usage << D3DSP_DCL_USAGE_SHIFT; -+ token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; -+ put_u32(buffer, token); -+ -+ reg.writemask = (1 << var->data_type->dimx) - 1; -+ write_sm1_dst_register(buffer, ®); -+} -+ -+static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+{ -+ bool write_in = false, write_out = false; -+ struct hlsl_ir_var *var; -+ -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) -+ write_in = true; -+ else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) -+ write_in = write_out = true; -+ else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) -+ write_in = true; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (write_in && var->is_input_semantic) -+ write_sm1_semantic_dcl(ctx, buffer, var, false); -+ if (write_out && var->is_output_semantic) -+ write_sm1_semantic_dcl(ctx, buffer, var, true); -+ } -+} -+ -+static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) -+{ -+ struct sm1_dst_register reg = {0}; -+ uint32_t token, res_type = 0; -+ -+ token = D3DSIO_DCL; -+ if (ctx->profile->major_version > 1) -+ token |= 2 << D3DSI_INSTLENGTH_SHIFT; -+ put_u32(buffer, token); -+ -+ switch (sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_2D: -+ res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; -+ break; -+ -+ case HLSL_SAMPLER_DIM_CUBE: -+ res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; -+ break; -+ -+ case HLSL_SAMPLER_DIM_3D: -+ res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ break; -+ } -+ -+ token = (1u << 31); -+ token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; -+ put_u32(buffer, token); -+ -+ reg.type = D3DSPR_SAMPLER; -+ reg.writemask = VKD3DSP_WRITEMASK_ALL; -+ reg.reg = reg_id; -+ -+ write_sm1_dst_register(buffer, ®); -+} -+ -+static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+{ -+ enum hlsl_sampler_dim sampler_dim; -+ unsigned int i, count, reg_id; -+ struct hlsl_ir_var *var; -+ -+ if (ctx->profile->major_version < 2) -+ return; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) -+ continue; -+ -+ count = var->bind_count[HLSL_REGSET_SAMPLERS]; -+ -+ for (i = 0; i < count; ++i) -+ { -+ if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -+ { -+ sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; -+ if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ { -+ /* These can appear in sm4-style combined sample instructions. */ -+ hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); -+ continue; -+ } -+ -+ reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; -+ write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); -+ } -+ } -+ } -+} -+ -+static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); -+ struct sm1_instruction sm1_instr = -+ { -+ .opcode = D3DSIO_MOV, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = instr->reg.id, -+ .dst.writemask = instr->reg.writemask, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_CONST, -+ .srcs[0].reg = constant->reg.id, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), -+ .src_count = 1, -+ }; -+ -+ assert(instr->reg.allocated); -+ assert(constant->reg.allocated); -+ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -+ write_sm1_instruction(ctx, buffer, &sm1_instr); -+} -+ -+static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) -+{ -+ struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -+ struct hlsl_ir_node *arg1 = expr->operands[0].node; -+ unsigned int i; -+ -+ for (i = 0; i < instr->data_type->dimx; ++i) -+ { -+ struct hlsl_reg src = arg1->reg, dst = instr->reg; -+ -+ src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); -+ dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); -+ write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); -+ } -+} -+ -+static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -+ struct hlsl_ir_node *arg1 = expr->operands[0].node; -+ struct hlsl_ir_node *arg2 = expr->operands[1].node; -+ struct hlsl_ir_node *arg3 = expr->operands[2].node; -+ -+ assert(instr->reg.allocated); -+ -+ if (instr->data_type->base_type != HLSL_TYPE_FLOAT) -+ { -+ /* These need to be lowered. */ -+ hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); -+ return; -+ } -+ -+ switch (expr->op) -+ { -+ case HLSL_OP1_ABS: -+ write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); -+ break; -+ -+ case HLSL_OP1_DSX: -+ write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); -+ break; -+ -+ case HLSL_OP1_DSY: -+ write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); -+ break; -+ -+ case HLSL_OP1_EXP2: -+ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); -+ break; -+ -+ case HLSL_OP1_LOG2: -+ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); -+ break; -+ -+ case HLSL_OP1_NEG: -+ write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -+ break; -+ -+ case HLSL_OP1_SAT: -+ write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); -+ break; -+ -+ case HLSL_OP1_RCP: -+ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); -+ break; -+ -+ case HLSL_OP1_RSQ: -+ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); -+ break; -+ -+ case HLSL_OP2_ADD: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case HLSL_OP2_MAX: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case HLSL_OP2_MIN: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case HLSL_OP2_MUL: -+ write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case HLSL_OP1_FRACT: -+ write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); -+ break; -+ -+ case HLSL_OP2_DOT: -+ switch (arg1->data_type->dimx) -+ { -+ case 4: -+ write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ case 3: -+ write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_OP3_DP2ADD: -+ write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ break; -+ -+ default: -+ hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); -+ break; -+ } -+} -+ -+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ switch (jump->type) -+ { -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ { -+ struct hlsl_reg *reg = &jump->condition.node->reg; -+ -+ struct sm1_instruction instr = -+ { -+ .opcode = VKD3D_SM1_OP_TEXKILL, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = reg->id, -+ .dst.writemask = reg->writemask, -+ .has_dst = 1, -+ }; -+ -+ write_sm1_instruction(ctx, buffer, &instr); -+ break; -+ } -+ -+ default: -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -+ } -+} -+ -+static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); -+ struct sm1_instruction sm1_instr = -+ { -+ .opcode = D3DSIO_MOV, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = instr->reg.id, -+ .dst.writemask = instr->reg.writemask, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].reg = reg.id, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), -+ .src_count = 1, -+ }; -+ -+ assert(instr->reg.allocated); -+ -+ if (load->src.var->is_uniform) -+ { -+ assert(reg.allocated); -+ sm1_instr.srcs[0].type = D3DSPR_CONST; -+ } -+ else if (load->src.var->is_input_semantic) -+ { -+ if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, -+ false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) -+ { -+ assert(reg.allocated); -+ sm1_instr.srcs[0].type = D3DSPR_INPUT; -+ sm1_instr.srcs[0].reg = reg.id; -+ } -+ else -+ sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); -+ } -+ -+ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -+ write_sm1_instruction(ctx, buffer, &sm1_instr); -+} -+ -+static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -+ struct hlsl_ir_node *coords = load->coords.node; -+ unsigned int sampler_offset, reg_id; -+ struct sm1_instruction sm1_instr; -+ -+ sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); -+ reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; -+ -+ sm1_instr = (struct sm1_instruction) -+ { -+ .opcode = D3DSIO_TEX, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = instr->reg.id, -+ .dst.writemask = instr->reg.writemask, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].reg = coords->reg.id, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), -+ -+ .srcs[1].type = D3DSPR_SAMPLER, -+ .srcs[1].reg = reg_id, -+ .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), -+ -+ .src_count = 2, -+ }; -+ -+ assert(instr->reg.allocated); -+ -+ write_sm1_instruction(ctx, buffer, &sm1_instr); -+} -+ -+static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_store *store = hlsl_ir_store(instr); -+ const struct hlsl_ir_node *rhs = store->rhs.node; -+ const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); -+ struct sm1_instruction sm1_instr = -+ { -+ .opcode = D3DSIO_MOV, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = reg.id, -+ .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].reg = rhs->reg.id, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), -+ .src_count = 1, -+ }; -+ -+ if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) -+ { -+ FIXME("Matrix writemasks need to be lowered.\n"); -+ return; -+ } -+ -+ if (store->lhs.var->is_output_semantic) -+ { -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) -+ { -+ sm1_instr.dst.type = D3DSPR_TEMP; -+ sm1_instr.dst.reg = 0; -+ } -+ else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, -+ true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) -+ { -+ assert(reg.allocated); -+ sm1_instr.dst.type = D3DSPR_OUTPUT; -+ sm1_instr.dst.reg = reg.id; -+ } -+ else -+ sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; -+ } -+ else -+ assert(reg.allocated); -+ -+ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -+ write_sm1_instruction(ctx, buffer, &sm1_instr); -+} -+ -+static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_node *instr) -+{ -+ const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); -+ const struct hlsl_ir_node *val = swizzle->val.node; -+ struct sm1_instruction sm1_instr = -+ { -+ .opcode = D3DSIO_MOV, -+ -+ .dst.type = D3DSPR_TEMP, -+ .dst.reg = instr->reg.id, -+ .dst.writemask = instr->reg.writemask, -+ .has_dst = 1, -+ -+ .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].reg = val->reg.id, -+ .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), -+ swizzle->swizzle, instr->data_type->dimx), -+ .src_count = 1, -+ }; -+ -+ assert(instr->reg.allocated); -+ assert(val->reg.allocated); -+ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -+ write_sm1_instruction(ctx, buffer, &sm1_instr); -+} -+ -+static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+ const struct hlsl_ir_function_decl *entry_func) -+{ -+ const struct hlsl_ir_node *instr; -+ -+ LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->data_type) -+ { -+ if (instr->data_type->class == HLSL_CLASS_MATRIX) -+ { -+ /* These need to be lowered. */ -+ hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); -+ continue; -+ } -+ else if (instr->data_type->class == HLSL_CLASS_OBJECT) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Object copy."); -+ break; -+ } -+ -+ assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); -+ } -+ -+ switch (instr->type) -+ { -+ case HLSL_IR_CALL: -+ vkd3d_unreachable(); -+ -+ case HLSL_IR_CONSTANT: -+ write_sm1_constant(ctx, buffer, instr); -+ break; -+ -+ case HLSL_IR_EXPR: -+ write_sm1_expr(ctx, buffer, instr); -+ break; -+ -+ case HLSL_IR_JUMP: -+ write_sm1_jump(ctx, buffer, instr); -+ break; -+ -+ case HLSL_IR_LOAD: -+ write_sm1_load(ctx, buffer, instr); -+ break; -+ -+ case HLSL_IR_RESOURCE_LOAD: -+ write_sm1_resource_load(ctx, buffer, instr); -+ break; -+ -+ case HLSL_IR_STORE: -+ write_sm1_store(ctx, buffer, instr); -+ break; -+ -+ case HLSL_IR_SWIZZLE: -+ write_sm1_swizzle(ctx, buffer, instr); -+ break; -+ -+ default: -+ hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -+ } -+ } -+} -+ -+int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -+{ -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ -+ put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); -+ -+ write_sm1_uniforms(ctx, &buffer, entry_func); -+ -+ write_sm1_constant_defs(ctx, &buffer); -+ write_sm1_semantic_dcls(ctx, &buffer); -+ write_sm1_sampler_dcls(ctx, &buffer); -+ write_sm1_instructions(ctx, &buffer, entry_func); -+ -+ put_u32(&buffer, D3DSIO_END); -+ -+ if (buffer.status) -+ ctx->result = buffer.status; -+ -+ if (!ctx->result) -+ { -+ out->code = buffer.data; -+ out->size = buffer.size; -+ } -+ else -+ { -+ vkd3d_free(buffer.data); -+ } -+ return ctx->result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index d99ea2e36b6..1cb00688c76 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -19,1680 +19,74 @@ - */ - - #include "vkd3d_shader_private.h" --#include "sm4.h" - --#define SM4_MAX_SRC_COUNT 6 --#define SM4_MAX_DST_COUNT 2 -- --STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); -- --void dxbc_writer_init(struct dxbc_writer *dxbc) --{ -- memset(dxbc, 0, sizeof(*dxbc)); --} -- --void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) --{ -- struct vkd3d_shader_dxbc_section_desc *section; -- -- assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); -- -- section = &dxbc->sections[dxbc->section_count++]; -- section->tag = tag; -- section->data.code = data; -- section->data.size = size; --} -- --int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, -- struct vkd3d_shader_code *dxbc, char **messages) --{ -- size_t size_position, offsets_position, checksum_position, i; -- struct vkd3d_bytecode_buffer buffer = {0}; -- uint32_t checksum[4]; -- -- TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); -- -- if (messages) -- *messages = NULL; -- -- put_u32(&buffer, TAG_DXBC); -- -- checksum_position = bytecode_get_size(&buffer); -- for (i = 0; i < 4; ++i) -- put_u32(&buffer, 0); -- -- put_u32(&buffer, 1); /* version */ -- size_position = put_u32(&buffer, 0); -- put_u32(&buffer, section_count); -- -- offsets_position = bytecode_get_size(&buffer); -- for (i = 0; i < section_count; ++i) -- put_u32(&buffer, 0); -- -- for (i = 0; i < section_count; ++i) -- { -- set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_get_size(&buffer)); -- put_u32(&buffer, sections[i].tag); -- put_u32(&buffer, sections[i].data.size); -- bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); -- } -- set_u32(&buffer, size_position, bytecode_get_size(&buffer)); -- -- vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); -- for (i = 0; i < 4; ++i) -- set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); -- -- if (!buffer.status) -- { -- dxbc->code = buffer.data; -- dxbc->size = buffer.size; -- } -- return buffer.status; --} -- --int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) --{ -- return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); --} -- --struct vkd3d_shader_src_param_entry --{ -- struct list entry; -- struct vkd3d_shader_src_param param; --}; -- --struct vkd3d_shader_sm4_parser --{ -- const uint32_t *start, *end; -- -- unsigned int output_map[MAX_REG_OUTPUT]; -- -- struct vkd3d_shader_parser p; --}; -- --struct vkd3d_sm4_opcode_info --{ -- enum vkd3d_sm4_opcode opcode; -- enum vkd3d_shader_opcode handler_idx; -- char dst_info[SM4_MAX_DST_COUNT]; -- char src_info[SM4_MAX_SRC_COUNT]; -- void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); --}; -- --static const enum vkd3d_primitive_type output_primitive_type_table[] = --{ -- /* UNKNOWN */ VKD3D_PT_UNDEFINED, -- /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, -- /* UNKNOWN */ VKD3D_PT_UNDEFINED, -- /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, -- /* UNKNOWN */ VKD3D_PT_UNDEFINED, -- /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, --}; -- --static const enum vkd3d_primitive_type input_primitive_type_table[] = --{ -- /* UNKNOWN */ VKD3D_PT_UNDEFINED, -- /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, -- /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, -- /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, -- /* UNKNOWN */ VKD3D_PT_UNDEFINED, -- /* UNKNOWN */ VKD3D_PT_UNDEFINED, -- /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, -- /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, --}; -- --static const enum vkd3d_shader_resource_type resource_type_table[] = --{ -- /* 0 */ VKD3D_SHADER_RESOURCE_NONE, -- /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -- /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, -- /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, -- /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, -- /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, -- /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, -- /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, -- /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, -- /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, -- /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, -- /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -- /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, --}; -- --static const enum vkd3d_data_type data_type_table[] = --{ -- /* 0 */ VKD3D_DATA_FLOAT, -- /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, -- /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, -- /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, -- /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, -- /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, -- /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, -- /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, -- /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, -- /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, --}; -- --static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) --{ -- return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); --} -- --static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) --{ -- const struct vkd3d_shader_version *version = &sm4->p.shader_version; -- -- return version->major >= 5 && version->minor >= 1; --} -- --static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); --static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); -- --static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, -- const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) --{ -- *register_space = 0; -- -- if (!shader_is_sm_5_1(priv)) -- return true; -- -- if (*ptr >= end) -- { -- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -- return false; -- } -- -- *register_space = *(*ptr)++; -- return true; --} -- --static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, -- (struct vkd3d_shader_src_param *)&ins->src[0]); -- ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? -- VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; --} -- --static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- struct vkd3d_shader_immediate_constant_buffer *icb; -- enum vkd3d_sm4_shader_data_type type; -- unsigned int icb_size; -- -- type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; -- if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) -- { -- FIXME("Ignoring shader data type %#x.\n", type); -- ins->handler_idx = VKD3DSIH_NOP; -- return; -- } -- -- ++tokens; -- icb_size = token_count - 1; -- if (icb_size % 4) -- { -- FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); -- ins->handler_idx = VKD3DSIH_INVALID; -- return; -- } -- -- if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) -- { -- ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); -- vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -- return; -- } -- icb->vec4_count = icb_size / 4; -- memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); -- shader_instruction_array_add_icb(&priv->p.instructions, icb); -- ins->declaration.icb = icb; --} -- --static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, -- const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) --{ -- range->first = reg->idx[1].offset; -- range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; -- if (range->last < range->first) -- { -- FIXME("Invalid register range [%u:%u].\n", range->first, range->last); -- vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, -- "Last register %u must not be less than first register %u in range.\n", range->last, range->first); -- } --} -- --static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; -- enum vkd3d_sm4_resource_type resource_type; -- const uint32_t *end = &tokens[token_count]; -- enum vkd3d_sm4_data_type data_type; -- enum vkd3d_data_type reg_data_type; -- DWORD components; -- unsigned int i; -- -- resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; -- if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) -- { -- FIXME("Unhandled resource type %#x.\n", resource_type); -- semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; -- } -- else -- { -- semantic->resource_type = resource_type_table[resource_type]; -- } -- -- if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS -- || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) -- { -- semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; -- } -- -- reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; -- shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); -- shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); -- -- components = *tokens++; -- for (i = 0; i < VKD3D_VEC4_SIZE; i++) -- { -- data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); -- -- if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) -- { -- FIXME("Unhandled data type %#x.\n", data_type); -- semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; -- } -- else -- { -- semantic->resource_data_type[i] = data_type_table[data_type]; -- } -- } -- -- if (reg_data_type == VKD3D_DATA_UAV) -- ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; -- -- shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); --} -- --static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- const uint32_t *end = &tokens[token_count]; -- -- shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); -- shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); -- if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) -- ins->flags |= VKD3DSI_INDEXED_DYNAMIC; -- -- ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; -- ins->declaration.cb.range.space = 0; -- -- if (shader_is_sm_5_1(priv)) -- { -- if (tokens >= end) -- { -- FIXME("Invalid ptr %p >= end %p.\n", tokens, end); -- return; -- } -- -- ins->declaration.cb.size = *tokens++; -- shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); -- } --} -- --static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- const uint32_t *end = &tokens[token_count]; -- -- ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; -- if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) -- FIXME("Unhandled sampler mode %#x.\n", ins->flags); -- shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); -- shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); -- shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); --} -- --static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, -- &ins->declaration.index_range.dst); -- ins->declaration.index_range.register_count = *tokens; --} -- --static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- enum vkd3d_sm4_output_primitive_type primitive_type; -- -- primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; -- if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) -- ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; -- else -- ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; -- -- if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) -- FIXME("Unhandled output primitive type %#x.\n", primitive_type); --} -- --static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- enum vkd3d_sm4_input_primitive_type primitive_type; -- -- primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; -- if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) -- { -- ins->declaration.primitive_type.type = VKD3D_PT_PATCH; -- ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; -- } -- else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) -- { -- ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; -- } -- else -- { -- ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; -- } -- -- if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) -- FIXME("Unhandled input primitive type %#x.\n", primitive_type); --} -- --static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.count = *tokens; --} -- --static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); --} -- --static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, -- &ins->declaration.register_semantic.reg); -- ins->declaration.register_semantic.sysval_semantic = *tokens; --} -- --static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); --} -- --static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, -- &ins->declaration.register_semantic.reg); -- ins->declaration.register_semantic.sysval_semantic = *tokens; --} -- --static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.indexable_temp.register_idx = *tokens++; -- ins->declaration.indexable_temp.register_size = *tokens++; -- ins->declaration.indexable_temp.component_count = *tokens; --} -- --static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; --} -- --static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; -- src_params[0].reg.u.fp_body_idx = *tokens++; -- shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); --} -- --static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.index = *tokens; --} -- --static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.index = *tokens++; -- FIXME("Ignoring set of function bodies (count %u).\n", *tokens); --} -- --static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.fp.index = *tokens++; -- ins->declaration.fp.body_count = *tokens++; -- ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; -- ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; -- FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); --} -- --static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) -- >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; --} -- --static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) -- >> VKD3D_SM5_TESSELLATOR_SHIFT; --} -- --static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) -- >> VKD3D_SM5_TESSELLATOR_SHIFT; --} -- --static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) -- >> VKD3D_SM5_TESSELLATOR_SHIFT; --} -- --static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.max_tessellation_factor = *(float *)tokens; --} -- --static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->declaration.thread_group_size.x = *tokens++; -- ins->declaration.thread_group_size.y = *tokens++; -- ins->declaration.thread_group_size.z = *tokens++; --} -- --static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; -- const uint32_t *end = &tokens[token_count]; -- -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); -- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -- ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; -- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); --} -- --static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; -- const uint32_t *end = &tokens[token_count]; -- -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); -- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -- ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; -- resource->byte_stride = *tokens++; -- if (resource->byte_stride % 4) -- FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); -- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); --} -- --static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); -- ins->declaration.tgsm_raw.byte_count = *tokens; -- if (ins->declaration.tgsm_raw.byte_count % 4) -- FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); --} -- --static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, -- &ins->declaration.tgsm_structured.reg); -- ins->declaration.tgsm_structured.byte_stride = *tokens++; -- ins->declaration.tgsm_structured.structure_count = *tokens; -- if (ins->declaration.tgsm_structured.byte_stride % 4) -- FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); --} -- --static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; -- const uint32_t *end = &tokens[token_count]; -- -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); -- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -- resource->byte_stride = *tokens++; -- if (resource->byte_stride % 4) -- FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); -- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); --} -- --static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; -- const uint32_t *end = &tokens[token_count]; -- -- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); -- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); --} -- --static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) --{ -- ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; --} -- --/* -- * d -> VKD3D_DATA_DOUBLE -- * f -> VKD3D_DATA_FLOAT -- * i -> VKD3D_DATA_INT -- * u -> VKD3D_DATA_UINT -- * O -> VKD3D_DATA_OPAQUE -- * R -> VKD3D_DATA_RESOURCE -- * S -> VKD3D_DATA_SAMPLER -- * U -> VKD3D_DATA_UAV -- */ --static const struct vkd3d_sm4_opcode_info opcode_table[] = --{ -- {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, -- {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, -- {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, -- {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", -- shader_sm4_read_conditional_op}, -- {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, -- {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, -- {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", -- shader_sm4_read_conditional_op}, -- {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, -- {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, -- {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, -- {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, -- {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", -- shader_sm4_read_conditional_op}, -- {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, -- {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, -- {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, -- {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, -- {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, -- {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, -- {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, -- {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, -- {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, -- {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, -- {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, -- {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, -- {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, -- {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, -- {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, -- {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, -- {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", -- shader_sm4_read_conditional_op}, -- {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, -- {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, -- {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, -- {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, -- {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, -- {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, -- {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, -- {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, -- {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, -- {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, -- {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, -- {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, -- {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, -- {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, -- {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, -- {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, -- {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, -- {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, -- {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, -- {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, -- {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, -- {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", -- shader_sm4_read_shader_data}, -- {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, -- {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, -- {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, -- {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, -- {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, -- {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, -- {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, -- {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, -- {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, -- {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", -- shader_sm4_read_conditional_op}, -- {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, -- {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, -- {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, -- {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, -- {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, -- {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, -- {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, -- {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, -- {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, -- {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, -- {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, -- {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, -- {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, -- {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, -- {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, -- {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, -- {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, -- {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, -- {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, -- {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, -- {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, -- {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, -- {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, -- {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", -- shader_sm4_read_dcl_resource}, -- {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", -- shader_sm4_read_dcl_constant_buffer}, -- {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", -- shader_sm4_read_dcl_sampler}, -- {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", -- shader_sm4_read_dcl_index_range}, -- {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", -- shader_sm4_read_dcl_output_topology}, -- {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", -- shader_sm4_read_dcl_input_primitive}, -- {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", -- shader_sm4_read_declaration_count}, -- {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", -- shader_sm4_read_declaration_dst}, -- {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", -- shader_sm4_read_declaration_register_semantic}, -- {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", -- shader_sm4_read_declaration_register_semantic}, -- {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", -- shader_sm4_read_dcl_input_ps}, -- {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", -- shader_sm4_read_declaration_register_semantic}, -- {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", -- shader_sm4_read_dcl_input_ps_siv}, -- {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", -- shader_sm4_read_declaration_dst}, -- {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", -- shader_sm4_read_declaration_register_semantic}, -- {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", -- shader_sm4_read_declaration_count}, -- {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", -- shader_sm4_read_dcl_indexable_temp}, -- {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", -- shader_sm4_read_dcl_global_flags}, -- {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, -- {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, -- {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, -- {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, -- {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, -- {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, -- {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, -- {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, -- {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, -- {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, -- {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", -- shader_sm5_read_fcall}, -- {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, -- {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, -- {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, -- {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, -- {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, -- {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, -- {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, -- {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, -- {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, -- {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, -- {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, -- {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, -- {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, -- {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, -- {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, -- {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, -- {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, -- {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, -- {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, -- {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, -- {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, -- {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", -- shader_sm5_read_dcl_function_body}, -- {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", -- shader_sm5_read_dcl_function_table}, -- {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", -- shader_sm5_read_dcl_interface}, -- {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", -- shader_sm5_read_control_point_count}, -- {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", -- shader_sm5_read_control_point_count}, -- {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", -- shader_sm5_read_dcl_tessellator_domain}, -- {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", -- shader_sm5_read_dcl_tessellator_partitioning}, -- {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", -- shader_sm5_read_dcl_tessellator_output_primitive}, -- {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", -- shader_sm5_read_dcl_hs_max_tessfactor}, -- {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", -- shader_sm4_read_declaration_count}, -- {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", -- shader_sm4_read_declaration_count}, -- {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", -- shader_sm5_read_dcl_thread_group}, -- {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", -- shader_sm4_read_dcl_resource}, -- {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", -- shader_sm5_read_dcl_uav_raw}, -- {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", -- shader_sm5_read_dcl_uav_structured}, -- {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", -- shader_sm5_read_dcl_tgsm_raw}, -- {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", -- shader_sm5_read_dcl_tgsm_structured}, -- {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", -- shader_sm5_read_dcl_resource_raw}, -- {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", -- shader_sm5_read_dcl_resource_structured}, -- {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, -- {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, -- {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, -- {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, -- {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, -- {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, -- {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, -- {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, -- {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, -- {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, -- {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, -- {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, -- {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, -- {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", -- shader_sm5_read_sync}, -- {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, -- {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, -- {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, -- {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, -- {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, -- {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, -- {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, -- {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, -- {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, -- {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, -- {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, -- {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, -- {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, -- {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, -- {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", -- shader_sm4_read_declaration_count}, -- {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, -- {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, -- {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, -- {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, -- {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, -- {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, -- {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, -- {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, -- {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, -- {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, -- {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, -- {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, -- {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, -- {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, -- {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, -- {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, -- {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, -- {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, -- {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, -- {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, -- {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, -- {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, -- {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, -- {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, --}; -- --static const enum vkd3d_shader_register_type register_type_table[] = --{ -- /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, -- /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, -- /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, -- /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, -- /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, -- /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, -- /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, -- /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, -- /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, -- /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, -- /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, -- /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, -- /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, -- /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, -- /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, -- /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, -- /* UNKNOWN */ ~0u, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, -- /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, -- /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, -- /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, -- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, -- /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, -- /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, -- /* UNKNOWN */ ~0u, -- /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, -- /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, -- /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, -- /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, -- /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, -- /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, -- /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, -- /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, -- /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, -- /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, -- /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, -- /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, --}; -- --static const enum vkd3d_shader_register_precision register_precision_table[] = --{ -- /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, -- /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, -- /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, -- /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, -- /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, -- /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, --}; -- --static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) --{ -- unsigned int i; -- -- for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) -- { -- if (opcode == opcode_table[i].opcode) return &opcode_table[i]; -- } -- -- return NULL; --} -- --static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) --{ -- switch (sm4->p.shader_version.type) -- { -- case VKD3D_SHADER_TYPE_PIXEL: -- if (reg->type == VKD3DSPR_OUTPUT) -- { -- unsigned int reg_idx = reg->idx[0].offset; -- -- if (reg_idx >= ARRAY_SIZE(sm4->output_map)) -- { -- ERR("Invalid output index %u.\n", reg_idx); -- break; -- } -- -- reg->type = VKD3DSPR_COLOROUT; -- reg->idx[0].offset = sm4->output_map[reg_idx]; -- } -- break; -- -- default: -- break; -- } --} -- --static enum vkd3d_data_type map_data_type(char t) --{ -- switch (t) -- { -- case 'd': -- return VKD3D_DATA_DOUBLE; -- case 'f': -- return VKD3D_DATA_FLOAT; -- case 'i': -- return VKD3D_DATA_INT; -- case 'u': -- return VKD3D_DATA_UINT; -- case 'O': -- return VKD3D_DATA_OPAQUE; -- case 'R': -- return VKD3D_DATA_RESOURCE; -- case 'S': -- return VKD3D_DATA_SAMPLER; -- case 'U': -- return VKD3D_DATA_UAV; -- default: -- ERR("Invalid data type '%c'.\n", t); -- return VKD3D_DATA_FLOAT; -- } --} -- --static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) --{ -- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); -- -- shader_instruction_array_destroy(&parser->instructions); -- free_shader_desc(&parser->shader_desc); -- vkd3d_free(sm4); --} -- --static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -- const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) --{ -- if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) -- { -- struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); -- -- if (!(reg_idx->rel_addr = rel_addr)) -- { -- ERR("Failed to get src param for relative addressing.\n"); -- return false; -- } -- -- if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) -- reg_idx->offset = *(*ptr)++; -- else -- reg_idx->offset = 0; -- shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); -- } -- else -- { -- reg_idx->rel_addr = NULL; -- reg_idx->offset = *(*ptr)++; -- } -- -- return true; --} -- --static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) --{ -- switch (register_type) -- { -- case VKD3D_SM4_RT_SAMPLER: -- case VKD3D_SM4_RT_RESOURCE: -- case VKD3D_SM4_RT_CONSTBUFFER: -- case VKD3D_SM5_RT_UAV: -- return true; -- -- default: -- return false; -- } --} -- --static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, -- enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) --{ -- enum vkd3d_sm4_register_precision precision; -- enum vkd3d_sm4_register_type register_type; -- enum vkd3d_sm4_extended_operand_type type; -- enum vkd3d_sm4_register_modifier m; -- uint32_t token, order, extended; -- -- if (*ptr >= end) -- { -- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -- return false; -- } -- token = *(*ptr)++; -- -- register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; -- if (register_type >= ARRAY_SIZE(register_type_table) -- || register_type_table[register_type] == VKD3DSPR_INVALID) -- { -- FIXME("Unhandled register type %#x.\n", register_type); -- param->type = VKD3DSPR_TEMP; -- } -- else -- { -- param->type = register_type_table[register_type]; -- } -- param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -- param->non_uniform = false; -- param->data_type = data_type; -- -- *modifier = VKD3DSPSM_NONE; -- if (token & VKD3D_SM4_EXTENDED_OPERAND) -- { -- if (*ptr >= end) -- { -- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -- return false; -- } -- extended = *(*ptr)++; -- -- if (extended & VKD3D_SM4_EXTENDED_OPERAND) -- { -- FIXME("Skipping second-order extended operand.\n"); -- *ptr += *ptr < end; -- } -- -- type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; -- if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) -- { -- m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; -- switch (m) -- { -- case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: -- *modifier = VKD3DSPSM_NEG; -- break; -- -- case VKD3D_SM4_REGISTER_MODIFIER_ABS: -- *modifier = VKD3DSPSM_ABS; -- break; -- -- case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: -- *modifier = VKD3DSPSM_ABSNEG; -- break; -- -- default: -- FIXME("Unhandled register modifier %#x.\n", m); -- /* fall-through */ -- case VKD3D_SM4_REGISTER_MODIFIER_NONE: -- break; -- } -- -- precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; -- if (precision >= ARRAY_SIZE(register_precision_table) -- || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) -- { -- FIXME("Unhandled register precision %#x.\n", precision); -- param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; -- } -- else -- { -- param->precision = register_precision_table[precision]; -- } -- -- if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) -- param->non_uniform = true; -- -- extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK -- | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK -- | VKD3D_SM4_EXTENDED_OPERAND); -- if (extended) -- FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); -- } -- else if (type) -- { -- FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); -- } -- } -- -- order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; -- -- if (order < 1) -- { -- param->idx[0].offset = ~0u; -- param->idx[0].rel_addr = NULL; -- } -- else -- { -- DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; -- if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) -- { -- ERR("Failed to read register index.\n"); -- return false; -- } -- } -- -- if (order < 2) -- { -- param->idx[1].offset = ~0u; -- param->idx[1].rel_addr = NULL; -- } -- else -- { -- DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; -- if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) -- { -- ERR("Failed to read register index.\n"); -- return false; -- } -- } -- -- if (order < 3) -- { -- param->idx[2].offset = ~0u; -- param->idx[2].rel_addr = NULL; -- } -- else -- { -- DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; -- if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) -- { -- ERR("Failed to read register index.\n"); -- return false; -- } -- } -- -- if (order > 3) -- { -- WARN("Unhandled order %u.\n", order); -- return false; -- } -- -- if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) -- { -- enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; -- unsigned int dword_count; -- -- switch (dimension) -- { -- case VKD3D_SM4_DIMENSION_SCALAR: -- param->immconst_type = VKD3D_IMMCONST_SCALAR; -- dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); -- if (end - *ptr < dword_count) -- { -- WARN("Invalid ptr %p, end %p.\n", *ptr, end); -- return false; -- } -- memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); -- *ptr += dword_count; -- break; -- -- case VKD3D_SM4_DIMENSION_VEC4: -- param->immconst_type = VKD3D_IMMCONST_VEC4; -- if (end - *ptr < VKD3D_VEC4_SIZE) -- { -- WARN("Invalid ptr %p, end %p.\n", *ptr, end); -- return false; -- } -- memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); -- *ptr += 4; -- break; -- -- default: -- FIXME("Unhandled dimension %#x.\n", dimension); -- break; -- } -- } -- else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) -- { -- /* SM5.1 places a symbol identifier in idx[0] and moves -- * other values up one slot. Normalize to SM5.1. */ -- param->idx[2] = param->idx[1]; -- param->idx[1] = param->idx[0]; -- } -- -- map_register(priv, param); -- -- return true; --} -- --static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) --{ -- switch (reg->type) -- { -- case VKD3DSPR_COVERAGE: -- case VKD3DSPR_DEPTHOUT: -- case VKD3DSPR_DEPTHOUTGE: -- case VKD3DSPR_DEPTHOUTLE: -- case VKD3DSPR_GSINSTID: -- case VKD3DSPR_LOCALTHREADINDEX: -- case VKD3DSPR_OUTPOINTID: -- case VKD3DSPR_PRIMID: -- case VKD3DSPR_SAMPLEMASK: -- case VKD3DSPR_OUTSTENCILREF: -- return true; -- default: -- return false; -- } --} -- --static uint32_t swizzle_from_sm4(uint32_t s) --{ -- return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); --} -- --static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) -+void dxbc_writer_init(struct dxbc_writer *dxbc) - { -- DWORD token; -- -- if (*ptr >= end) -- { -- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -- return false; -- } -- token = **ptr; -- -- if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) -- { -- ERR("Failed to read parameter.\n"); -- return false; -- } -- -- if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) -- { -- src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -- } -- else -- { -- enum vkd3d_sm4_swizzle_type swizzle_type = -- (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -- -- switch (swizzle_type) -- { -- case VKD3D_SM4_SWIZZLE_NONE: -- if (shader_sm4_is_scalar_register(&src_param->reg)) -- src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -- else -- src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -- break; -- -- case VKD3D_SM4_SWIZZLE_SCALAR: -- src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; -- src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; -- break; -- -- case VKD3D_SM4_SWIZZLE_VEC4: -- src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); -- break; -- -- default: -- FIXME("Unhandled swizzle type %#x.\n", swizzle_type); -- break; -- } -- } -- -- return true; -+ memset(dxbc, 0, sizeof(*dxbc)); - } - --static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) -+void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) - { -- enum vkd3d_shader_src_modifier modifier; -- DWORD token; -- -- if (*ptr >= end) -- { -- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -- return false; -- } -- token = **ptr; -- -- if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) -- { -- ERR("Failed to read parameter.\n"); -- return false; -- } -- -- if (modifier != VKD3DSPSM_NONE) -- { -- ERR("Invalid source modifier %#x on destination register.\n", modifier); -- return false; -- } -+ struct vkd3d_shader_dxbc_section_desc *section; - -- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; -- if (data_type == VKD3D_DATA_DOUBLE) -- dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); -- /* Scalar registers are declared with no write mask in shader bytecode. */ -- if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) -- dst_param->write_mask = VKD3DSP_WRITEMASK_0; -- dst_param->modifiers = 0; -- dst_param->shift = 0; -+ assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - -- return true; -+ section = &dxbc->sections[dxbc->section_count++]; -+ section->tag = tag; -+ section->data.code = data; -+ section->data.size = size; - } - --static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) -+int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, -+ struct vkd3d_shader_code *dxbc, char **messages) - { -- enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; -- -- switch (modifier_type) -- { -- case VKD3D_SM4_MODIFIER_AOFFIMMI: -- { -- static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER -- | VKD3D_SM4_MODIFIER_MASK -- | VKD3D_SM4_AOFFIMMI_U_MASK -- | VKD3D_SM4_AOFFIMMI_V_MASK -- | VKD3D_SM4_AOFFIMMI_W_MASK; -- -- /* Bit fields are used for sign extension. */ -- struct -- { -- int u : 4; -- int v : 4; -- int w : 4; -- } aoffimmi; -- -- if (modifier & ~recognized_bits) -- FIXME("Unhandled instruction modifier %#x.\n", modifier); -- -- aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; -- aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; -- aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; -- ins->texel_offset.u = aoffimmi.u; -- ins->texel_offset.v = aoffimmi.v; -- ins->texel_offset.w = aoffimmi.w; -- break; -- } -- -- case VKD3D_SM5_MODIFIER_DATA_TYPE: -- { -- DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; -- unsigned int i; -- -- for (i = 0; i < VKD3D_VEC4_SIZE; i++) -- { -- enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); -- -- if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) -- { -- FIXME("Unhandled data type %#x.\n", data_type); -- ins->resource_data_type[i] = VKD3D_DATA_FLOAT; -- } -- else -- { -- ins->resource_data_type[i] = data_type_table[data_type]; -- } -- } -- break; -- } -- -- case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: -- { -- enum vkd3d_sm4_resource_type resource_type -- = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; -- -- if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) -- ins->raw = true; -- else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) -- ins->structured = true; -- -- if (resource_type < ARRAY_SIZE(resource_type_table)) -- ins->resource_type = resource_type_table[resource_type]; -- else -- { -- FIXME("Unhandled resource type %#x.\n", resource_type); -- ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; -- } -- -- ins->resource_stride -- = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; -- break; -- } -+ size_t size_position, offsets_position, checksum_position, i; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ uint32_t checksum[4]; - -- default: -- FIXME("Unhandled instruction modifier %#x.\n", modifier); -- } --} -+ TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); - --static void shader_sm4_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) --{ -- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); -- const struct vkd3d_sm4_opcode_info *opcode_info; -- uint32_t opcode_token, opcode, previous_token; -- struct vkd3d_shader_dst_param *dst_params; -- struct vkd3d_shader_src_param *src_params; -- const uint32_t **ptr = &parser->ptr; -- unsigned int i, len; -- size_t remaining; -- const uint32_t *p; -- DWORD precise; -+ if (messages) -+ *messages = NULL; - -- if (*ptr >= sm4->end) -- { -- WARN("End of byte-code, failed to read opcode.\n"); -- goto fail; -- } -- remaining = sm4->end - *ptr; -+ put_u32(&buffer, TAG_DXBC); - -- ++parser->location.line; -+ checksum_position = bytecode_get_size(&buffer); -+ for (i = 0; i < 4; ++i) -+ put_u32(&buffer, 0); - -- opcode_token = *(*ptr)++; -- opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; -+ put_u32(&buffer, 1); /* version */ -+ size_position = put_u32(&buffer, 0); -+ put_u32(&buffer, section_count); - -- len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); -- if (!len) -- { -- if (remaining < 2) -- { -- WARN("End of byte-code, failed to read length token.\n"); -- goto fail; -- } -- len = **ptr; -- } -- if (!len || remaining < len) -- { -- WARN("Read invalid length %u (remaining %zu).\n", len, remaining); -- goto fail; -- } -- --len; -+ offsets_position = bytecode_get_size(&buffer); -+ for (i = 0; i < section_count; ++i) -+ put_u32(&buffer, 0); - -- if (!(opcode_info = get_opcode_info(opcode))) -+ for (i = 0; i < section_count; ++i) - { -- FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); -- ins->handler_idx = VKD3DSIH_INVALID; -- *ptr += len; -- return; -- } -- -- ins->handler_idx = opcode_info->handler_idx; -- ins->flags = 0; -- ins->coissue = false; -- ins->raw = false; -- ins->structured = false; -- ins->predicate = NULL; -- ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); -- ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); -- ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); -- if (!src_params && ins->src_count) -- { -- ERR("Failed to allocate src parameters.\n"); -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -- return; -+ set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_align(&buffer)); -+ put_u32(&buffer, sections[i].tag); -+ put_u32(&buffer, sections[i].data.size); -+ bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); - } -- ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; -- ins->resource_stride = 0; -- ins->resource_data_type[0] = VKD3D_DATA_FLOAT; -- ins->resource_data_type[1] = VKD3D_DATA_FLOAT; -- ins->resource_data_type[2] = VKD3D_DATA_FLOAT; -- ins->resource_data_type[3] = VKD3D_DATA_FLOAT; -- memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); -+ set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - -- p = *ptr; -- *ptr += len; -+ vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); -+ for (i = 0; i < 4; ++i) -+ set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); - -- if (opcode_info->read_opcode_func) -- { -- ins->dst = NULL; -- ins->dst_count = 0; -- opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); -- } -- else -+ if (!buffer.status) - { -- enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; -- -- previous_token = opcode_token; -- while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) -- shader_sm4_read_instruction_modifier(previous_token = *p++, ins); -- -- ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -- if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) -- { -- ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; -- instruction_dst_modifier = VKD3DSPDM_SATURATE; -- } -- precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; -- ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; -- -- ins->dst = dst_params = shader_parser_get_dst_params(parser, ins->dst_count); -- if (!dst_params && ins->dst_count) -- { -- ERR("Failed to allocate dst parameters.\n"); -- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -- return; -- } -- for (i = 0; i < ins->dst_count; ++i) -- { -- if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), -- &dst_params[i]))) -- { -- ins->handler_idx = VKD3DSIH_INVALID; -- return; -- } -- dst_params[i].modifiers |= instruction_dst_modifier; -- } -- -- for (i = 0; i < ins->src_count; ++i) -- { -- if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), -- &src_params[i]))) -- { -- ins->handler_idx = VKD3DSIH_INVALID; -- return; -- } -- } -+ dxbc->code = buffer.data; -+ dxbc->size = buffer.size; - } -- -- return; -- --fail: -- *ptr = sm4->end; -- ins->handler_idx = VKD3DSIH_INVALID; -- return; --} -- --static bool shader_sm4_is_end(struct vkd3d_shader_parser *parser) --{ -- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); -- -- return parser->ptr == sm4->end; -+ return buffer.status; - } - --static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = --{ -- .parser_destroy = shader_sm4_destroy, --}; -- --static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, -- size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, -- struct vkd3d_shader_message_context *message_context) -+int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) - { -- struct vkd3d_shader_version version; -- uint32_t version_token, token_count; -- unsigned int i; -- -- if (byte_code_size / sizeof(*byte_code) < 2) -- { -- WARN("Invalid byte code size %lu.\n", (long)byte_code_size); -- return false; -- } -- -- version_token = byte_code[0]; -- TRACE("Version: 0x%08x.\n", version_token); -- token_count = byte_code[1]; -- TRACE("Token count: %u.\n", token_count); -- -- if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) -- { -- WARN("Invalid token count %u.\n", token_count); -- return false; -- } -- -- sm4->start = &byte_code[2]; -- sm4->end = &byte_code[token_count]; -- -- switch (version_token >> 16) -- { -- case VKD3D_SM4_PS: -- version.type = VKD3D_SHADER_TYPE_PIXEL; -- break; -- -- case VKD3D_SM4_VS: -- version.type = VKD3D_SHADER_TYPE_VERTEX; -- break; -- -- case VKD3D_SM4_GS: -- version.type = VKD3D_SHADER_TYPE_GEOMETRY; -- break; -- -- case VKD3D_SM5_HS: -- version.type = VKD3D_SHADER_TYPE_HULL; -- break; -- -- case VKD3D_SM5_DS: -- version.type = VKD3D_SHADER_TYPE_DOMAIN; -- break; -- -- case VKD3D_SM5_CS: -- version.type = VKD3D_SHADER_TYPE_COMPUTE; -- break; -- -- default: -- FIXME("Unrecognised shader type %#x.\n", version_token >> 16); -- } -- version.major = VKD3D_SM4_VERSION_MAJOR(version_token); -- version.minor = VKD3D_SM4_VERSION_MINOR(version_token); -- -- /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, -- token_count / 7u + 20)) -- return false; -- sm4->p.ptr = sm4->start; -- -- memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); -- for (i = 0; i < output_signature->element_count; ++i) -- { -- struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; -- -- if (version.type == VKD3D_SHADER_TYPE_PIXEL -- && ascii_strcasecmp(e->semantic_name, "SV_Target")) -- continue; -- if (e->register_index >= ARRAY_SIZE(sm4->output_map)) -- { -- WARN("Invalid output index %u.\n", e->register_index); -- continue; -- } -- -- sm4->output_map[e->register_index] = e->semantic_index; -- } -- -- return true; -+ return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); - } - - static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) -@@ -1927,13 +321,33 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, - return ret; - } - -+/* Shader Model 6 shaders use these special values in the output signature, -+ * but Shader Model 4/5 just use VKD3D_SHADER_SV_NONE. Normalize to SM6. */ -+static enum vkd3d_shader_sysval_semantic map_fragment_output_sysval(const char *name) -+{ -+ if (!ascii_strcasecmp(name, "sv_target")) -+ return VKD3D_SHADER_SV_TARGET; -+ if (!ascii_strcasecmp(name, "sv_depth")) -+ return VKD3D_SHADER_SV_DEPTH; -+ if (!ascii_strcasecmp(name, "sv_coverage")) -+ return VKD3D_SHADER_SV_COVERAGE; -+ if (!ascii_strcasecmp(name, "sv_depthgreaterequal")) -+ return VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL; -+ if (!ascii_strcasecmp(name, "sv_depthlessequal")) -+ return VKD3D_SHADER_SV_DEPTH_LESS_EQUAL; -+ if (!ascii_strcasecmp(name, "sv_stencilref")) -+ return VKD3D_SHADER_SV_STENCIL_REF; -+ -+ return VKD3D_SHADER_SV_NONE; -+} -+ - static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *section, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *s) -+ struct vkd3d_shader_message_context *message_context, struct shader_signature *s) - { - bool has_stream_index, has_min_precision; -- struct vkd3d_shader_signature_element *e; - const char *data = section->data.code; - uint32_t count, header_size; -+ struct signature_element *e; - const char *ptr = data; - unsigned int i; - -@@ -1979,6 +393,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - { - uint32_t name_offset, mask; - -+ e[i].sort_index = i; -+ - if (has_stream_index) - read_dword(&ptr, &e[i].stream_index); - else -@@ -1995,6 +411,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - read_dword(&ptr, &e[i].sysval_semantic); - read_dword(&ptr, &e[i].component_type); - read_dword(&ptr, &e[i].register_index); -+ e[i].target_location = e[i].register_index; -+ e[i].register_count = 1; - read_dword(&ptr, &mask); - e[i].mask = mask & 0xff; - e[i].used_mask = (mask >> 8) & 0xff; -@@ -2003,6 +421,9 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - case TAG_OSGN: - case TAG_OSG1: - case TAG_OSG5: -+ if (e[i].sysval_semantic == VKD3D_SHADER_SV_NONE) -+ e[i].sysval_semantic = map_fragment_output_sysval(e[i].semantic_name); -+ /* Fall through. */ - case TAG_PCSG: - case TAG_PSG1: - e[i].used_mask = e[i].mask & ~e[i].used_mask; -@@ -2029,7 +450,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, void *ctx) - { -- struct vkd3d_shader_signature *is = ctx; -+ struct shader_signature *is = ctx; - - if (section->tag != TAG_ISGN) - return VKD3D_OK; -@@ -2037,13 +458,13 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, - if (is->elements) - { - FIXME("Multiple input signatures.\n"); -- vkd3d_shader_free_shader_signature(is); -+ shader_signature_cleanup(is); - } - return shader_parse_signature(section, message_context, is); - } - - int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature) -+ struct vkd3d_shader_message_context *message_context, struct shader_signature *signature) - { - int ret; - -@@ -2096,8 +517,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - return ret; - break; - -+ case TAG_DXIL: - case TAG_SHDR: - case TAG_SHEX: -+ if ((section->tag == TAG_DXIL) != desc->is_dxil) -+ { -+ TRACE("Skipping chunk %#x.\n", section->tag); -+ break; -+ } - if (desc->byte_code) - FIXME("Multiple shader code chunks.\n"); - desc->byte_code = section->data.code; -@@ -2108,10 +535,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - TRACE("Skipping AON9 shader code chunk.\n"); - break; - -- case TAG_DXIL: -- FIXME("Skipping DXIL shader model 6+ code chunk.\n"); -- break; -- - default: - TRACE("Skipping chunk %#x.\n", section->tag); - break; -@@ -2122,22 +545,16 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - - void free_shader_desc(struct vkd3d_shader_desc *desc) - { -- vkd3d_shader_free_shader_signature(&desc->input_signature); -- vkd3d_shader_free_shader_signature(&desc->output_signature); -- vkd3d_shader_free_shader_signature(&desc->patch_constant_signature); -+ shader_signature_cleanup(&desc->input_signature); -+ shader_signature_cleanup(&desc->output_signature); -+ shader_signature_cleanup(&desc->patch_constant_signature); - } - --static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, -+int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) - { - int ret; - -- desc->byte_code = NULL; -- desc->byte_code_size = 0; -- memset(&desc->input_signature, 0, sizeof(desc->input_signature)); -- memset(&desc->output_signature, 0, sizeof(desc->output_signature)); -- memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); -- - ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); - if (!desc->byte_code) - ret = VKD3D_ERROR_INVALID_ARGUMENT; -@@ -2151,66 +568,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - return ret; - } - --int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) --{ -- struct vkd3d_shader_instruction_array *instructions; -- struct vkd3d_shader_desc *shader_desc; -- struct vkd3d_shader_instruction *ins; -- struct vkd3d_shader_sm4_parser *sm4; -- int ret; -- -- if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) -- { -- ERR("Failed to allocate parser.\n"); -- return VKD3D_ERROR_OUT_OF_MEMORY; -- } -- -- shader_desc = &sm4->p.shader_desc; -- if ((ret = shader_extract_from_dxbc(&compile_info->source, -- message_context, compile_info->source_name, shader_desc)) < 0) -- { -- WARN("Failed to extract shader, vkd3d result %d.\n", ret); -- vkd3d_free(sm4); -- return ret; -- } -- -- if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, -- compile_info->source_name, &shader_desc->output_signature, message_context)) -- { -- WARN("Failed to initialise shader parser.\n"); -- free_shader_desc(shader_desc); -- vkd3d_free(sm4); -- return VKD3D_ERROR_INVALID_ARGUMENT; -- } -- -- instructions = &sm4->p.instructions; -- while (!shader_sm4_is_end(&sm4->p)) -- { -- if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) -- { -- ERR("Failed to allocate instructions.\n"); -- vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- shader_sm4_destroy(&sm4->p); -- return VKD3D_ERROR_OUT_OF_MEMORY; -- } -- ins = &instructions->elements[instructions->count]; -- shader_sm4_read_instruction(&sm4->p, ins); -- -- if (ins->handler_idx == VKD3DSIH_INVALID) -- { -- WARN("Encountered unrecognized or invalid instruction.\n"); -- shader_sm4_destroy(&sm4->p); -- return VKD3D_ERROR_OUT_OF_MEMORY; -- } -- ++instructions->count; -- } -- -- *parser = &sm4->p; -- -- return VKD3D_OK; --} -- - /* root signatures */ - #define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE - -@@ -2862,7 +1219,7 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co - size_t parameters_position; - unsigned int i; - -- parameters_position = bytecode_get_size(buffer); -+ parameters_position = bytecode_align(buffer); - for (i = 0; i < parameter_count; ++i) - { - put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -new file mode 100644 -index 00000000000..b78c78d34a7 ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -0,0 +1,2968 @@ -+/* -+ * Copyright 2023 Conor McCarthy for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "vkd3d_shader_private.h" -+ -+#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -+#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) -+ -+#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) -+#define DXIL_OP_MAX_OPERANDS 17 -+ -+enum bitcode_block_id -+{ -+ BLOCKINFO_BLOCK = 0, -+ MODULE_BLOCK = 8, -+ PARAMATTR_BLOCK = 9, -+ PARAMATTR_GROUP_BLOCK = 10, -+ CONSTANTS_BLOCK = 11, -+ FUNCTION_BLOCK = 12, -+ VALUE_SYMTAB_BLOCK = 14, -+ METADATA_BLOCK = 15, -+ METADATA_ATTACHMENT_BLOCK = 16, -+ TYPE_BLOCK = 17, -+ USELIST_BLOCK = 18, -+}; -+ -+enum bitcode_blockinfo_code -+{ -+ SETBID = 1, -+ BLOCKNAME = 2, -+ SETRECORDNAME = 3, -+}; -+ -+enum bitcode_block_abbreviation -+{ -+ END_BLOCK = 0, -+ ENTER_SUBBLOCK = 1, -+ DEFINE_ABBREV = 2, -+ UNABBREV_RECORD = 3, -+}; -+ -+enum bitcode_abbrev_type -+{ -+ ABBREV_FIXED = 1, -+ ABBREV_VBR = 2, -+ ABBREV_ARRAY = 3, -+ ABBREV_CHAR = 4, -+ ABBREV_BLOB = 5, -+}; -+ -+enum bitcode_address_space -+{ -+ ADDRESS_SPACE_DEFAULT, -+ ADDRESS_SPACE_DEVICEMEM, -+ ADDRESS_SPACE_CBUFFER, -+ ADDRESS_SPACE_GROUPSHARED, -+}; -+ -+enum bitcode_module_code -+{ -+ MODULE_CODE_VERSION = 1, -+ MODULE_CODE_GLOBALVAR = 7, -+ MODULE_CODE_FUNCTION = 8, -+}; -+ -+enum bitcode_constant_code -+{ -+ CST_CODE_SETTYPE = 1, -+ CST_CODE_NULL = 2, -+ CST_CODE_UNDEF = 3, -+ CST_CODE_INTEGER = 4, -+ CST_CODE_FLOAT = 6, -+ CST_CODE_STRING = 8, -+ CST_CODE_CE_GEP = 12, -+ CST_CODE_CE_INBOUNDS_GEP = 20, -+ CST_CODE_DATA = 22, -+}; -+ -+enum bitcode_function_code -+{ -+ FUNC_CODE_DECLAREBLOCKS = 1, -+ FUNC_CODE_INST_BINOP = 2, -+ FUNC_CODE_INST_CAST = 3, -+ FUNC_CODE_INST_RET = 10, -+ FUNC_CODE_INST_BR = 11, -+ FUNC_CODE_INST_SWITCH = 12, -+ FUNC_CODE_INST_PHI = 16, -+ FUNC_CODE_INST_ALLOCA = 19, -+ FUNC_CODE_INST_LOAD = 20, -+ FUNC_CODE_INST_EXTRACTVAL = 26, -+ FUNC_CODE_INST_CMP2 = 28, -+ FUNC_CODE_INST_VSELECT = 29, -+ FUNC_CODE_INST_CALL = 34, -+ FUNC_CODE_INST_ATOMICRMW = 38, -+ FUNC_CODE_INST_LOADATOMIC = 41, -+ FUNC_CODE_INST_GEP = 43, -+ FUNC_CODE_INST_STORE = 44, -+ FUNC_CODE_INST_STOREATOMIC = 45, -+ FUNC_CODE_INST_CMPXCHG = 46, -+}; -+ -+enum bitcode_type_code -+{ -+ TYPE_CODE_NUMENTRY = 1, -+ TYPE_CODE_VOID = 2, -+ TYPE_CODE_FLOAT = 3, -+ TYPE_CODE_DOUBLE = 4, -+ TYPE_CODE_LABEL = 5, -+ TYPE_CODE_INTEGER = 7, -+ TYPE_CODE_POINTER = 8, -+ TYPE_CODE_HALF = 10, -+ TYPE_CODE_ARRAY = 11, -+ TYPE_CODE_VECTOR = 12, -+ TYPE_CODE_METADATA = 16, -+ TYPE_CODE_STRUCT_ANON = 18, -+ TYPE_CODE_STRUCT_NAME = 19, -+ TYPE_CODE_STRUCT_NAMED = 20, -+ TYPE_CODE_FUNCTION = 21, -+}; -+ -+enum bitcode_value_symtab_code -+{ -+ VST_CODE_ENTRY = 1, -+ VST_CODE_BBENTRY = 2, -+}; -+ -+enum dx_intrinsic_opcode -+{ -+ DX_STORE_OUTPUT = 5, -+}; -+ -+struct sm6_pointer_info -+{ -+ const struct sm6_type *type; -+ enum bitcode_address_space addr_space; -+}; -+ -+struct sm6_struct_info -+{ -+ const char *name; -+ unsigned int elem_count; -+ const struct sm6_type *elem_types[]; -+}; -+ -+struct sm6_function_info -+{ -+ const struct sm6_type *ret_type; -+ unsigned int param_count; -+ const struct sm6_type *param_types[]; -+}; -+ -+struct sm6_array_info -+{ -+ unsigned int count; -+ const struct sm6_type *elem_type; -+}; -+ -+enum sm6_type_class -+{ -+ TYPE_CLASS_VOID, -+ TYPE_CLASS_INTEGER, -+ TYPE_CLASS_FLOAT, -+ TYPE_CLASS_POINTER, -+ TYPE_CLASS_STRUCT, -+ TYPE_CLASS_FUNCTION, -+ TYPE_CLASS_VECTOR, -+ TYPE_CLASS_ARRAY, -+ TYPE_CLASS_LABEL, -+ TYPE_CLASS_METADATA, -+}; -+ -+struct sm6_type -+{ -+ enum sm6_type_class class; -+ union -+ { -+ unsigned int width; -+ struct sm6_pointer_info pointer; -+ struct sm6_struct_info *struc; -+ struct sm6_function_info *function; -+ struct sm6_array_info array; -+ } u; -+}; -+ -+enum sm6_value_type -+{ -+ VALUE_TYPE_FUNCTION, -+ VALUE_TYPE_REG, -+}; -+ -+struct sm6_function_data -+{ -+ const char *name; -+ bool is_prototype; -+ unsigned int attribs_id; -+}; -+ -+struct sm6_value -+{ -+ const struct sm6_type *type; -+ enum sm6_value_type value_type; -+ bool is_undefined; -+ union -+ { -+ struct sm6_function_data function; -+ struct vkd3d_shader_register reg; -+ } u; -+}; -+ -+struct dxil_record -+{ -+ unsigned int code; -+ unsigned int operand_count; -+ uint64_t operands[]; -+}; -+ -+struct sm6_symbol -+{ -+ unsigned int id; -+ const char *name; -+}; -+ -+struct sm6_block -+{ -+ struct vkd3d_shader_instruction *instructions; -+ size_t instruction_capacity; -+ size_t instruction_count; -+}; -+ -+struct sm6_function -+{ -+ const struct sm6_value *declaration; -+ -+ struct sm6_block *blocks[1]; -+ size_t block_count; -+ -+ size_t value_count; -+}; -+ -+struct dxil_block -+{ -+ const struct dxil_block *parent; -+ enum bitcode_block_id id; -+ unsigned int abbrev_len; -+ unsigned int start; -+ unsigned int length; -+ unsigned int level; -+ -+ /* The abbrev, block and record structs are not relocatable. */ -+ struct dxil_abbrev **abbrevs; -+ size_t abbrev_capacity; -+ size_t abbrev_count; -+ unsigned int blockinfo_bid; -+ bool has_bid; -+ -+ struct dxil_block **child_blocks; -+ size_t child_block_capacity; -+ size_t child_block_count; -+ -+ struct dxil_record **records; -+ size_t record_capacity; -+ size_t record_count; -+}; -+ -+struct sm6_parser -+{ -+ const uint32_t *ptr, *start, *end; -+ unsigned int bitpos; -+ -+ struct dxil_block root_block; -+ struct dxil_block *current_block; -+ -+ struct dxil_global_abbrev **abbrevs; -+ size_t abbrev_capacity; -+ size_t abbrev_count; -+ -+ struct sm6_type *types; -+ size_t type_count; -+ -+ struct sm6_symbol *global_symbols; -+ size_t global_symbol_count; -+ -+ struct vkd3d_shader_dst_param *output_params; -+ -+ struct sm6_function *functions; -+ size_t function_count; -+ -+ struct sm6_value *values; -+ size_t value_count; -+ size_t value_capacity; -+ size_t cur_max_value; -+ -+ struct vkd3d_shader_parser p; -+}; -+ -+struct dxil_abbrev_operand -+{ -+ uint64_t context; -+ bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); -+}; -+ -+struct dxil_abbrev -+{ -+ unsigned int count; -+ bool is_array; -+ struct dxil_abbrev_operand operands[]; -+}; -+ -+struct dxil_global_abbrev -+{ -+ unsigned int block_id; -+ struct dxil_abbrev abbrev; -+}; -+ -+static const uint64_t CALL_CONV_FLAG_EXPLICIT_TYPE = 1ull << 15; -+ -+static size_t size_add_with_overflow_check(size_t a, size_t b) -+{ -+ size_t i = a + b; -+ return (i < a) ? SIZE_MAX : i; -+} -+ -+static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) -+{ -+ return CONTAINING_RECORD(parser, struct sm6_parser, p); -+} -+ -+static bool sm6_parser_is_end(struct sm6_parser *sm6) -+{ -+ return sm6->ptr == sm6->end; -+} -+ -+static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) -+{ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ return *sm6->ptr++; -+} -+ -+static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) -+{ -+ unsigned int l, prev_len = 0; -+ uint32_t bits; -+ -+ if (!length) -+ return 0; -+ -+ assert(length < 32); -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ -+ assert(sm6->bitpos < 32); -+ bits = *sm6->ptr >> sm6->bitpos; -+ l = 32 - sm6->bitpos; -+ if (l <= length) -+ { -+ ++sm6->ptr; -+ if (sm6_parser_is_end(sm6) && l < length) -+ { -+ sm6->p.failed = true; -+ return bits; -+ } -+ sm6->bitpos = 0; -+ bits |= *sm6->ptr << l; -+ prev_len = l; -+ } -+ sm6->bitpos += length - prev_len; -+ -+ return bits & ((1 << length) - 1); -+} -+ -+static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) -+{ -+ unsigned int bits, flag, mask, shift = 0; -+ uint64_t result = 0; -+ -+ if (!length) -+ return 0; -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return 0; -+ } -+ -+ flag = 1 << (length - 1); -+ mask = flag - 1; -+ do -+ { -+ bits = sm6_parser_read_bits(sm6, length); -+ result |= (uint64_t)(bits & mask) << shift; -+ shift += length - 1; -+ } while ((bits & flag) && !sm6->p.failed && shift < 64); -+ -+ sm6->p.failed |= !!(bits & flag); -+ -+ return result; -+} -+ -+static void sm6_parser_align_32(struct sm6_parser *sm6) -+{ -+ if (!sm6->bitpos) -+ return; -+ -+ if (sm6_parser_is_end(sm6)) -+ { -+ sm6->p.failed = true; -+ return; -+ } -+ -+ ++sm6->ptr; -+ sm6->bitpos = 0; -+} -+ -+static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) -+{ -+ /* BLOCKINFO blocks must only occur immediately below the module root block. */ -+ if (block->level > 1) -+ { -+ WARN("Invalid blockinfo block level %u.\n", block->level); -+ return false; -+ } -+ -+ switch (record->code) -+ { -+ case SETBID: -+ if (!record->operand_count) -+ { -+ WARN("Missing id operand.\n"); -+ return false; -+ } -+ if (record->operands[0] > UINT_MAX) -+ WARN("Truncating block id %"PRIu64".\n", record->operands[0]); -+ block->blockinfo_bid = record->operands[0]; -+ block->has_bid = true; -+ break; -+ case BLOCKNAME: -+ case SETRECORDNAME: -+ break; -+ default: -+ FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); -+ break; -+ } -+ -+ return true; -+} -+ -+static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) -+{ -+ unsigned int reserve; -+ -+ switch (block->id) -+ { -+ /* Rough initial reserve sizes for small shaders. */ -+ case CONSTANTS_BLOCK: reserve = 32; break; -+ case FUNCTION_BLOCK: reserve = 128; break; -+ case METADATA_BLOCK: reserve = 32; break; -+ case TYPE_BLOCK: reserve = 32; break; -+ default: reserve = 8; break; -+ } -+ reserve = max(reserve, block->record_count + 1); -+ if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) -+ { -+ ERR("Failed to allocate %u records.\n", reserve); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ block->records[block->record_count++] = record; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ enum vkd3d_result ret = VKD3D_OK; -+ unsigned int code, count, i; -+ struct dxil_record *record; -+ -+ code = sm6_parser_read_vbr(sm6, 6); -+ -+ count = sm6_parser_read_vbr(sm6, 6); -+ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ record->code = code; -+ record->operand_count = count; -+ -+ for (i = 0; i < count; ++i) -+ record->operands[i] = sm6_parser_read_vbr(sm6, 6); -+ if (sm6->p.failed) -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ -+ if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) -+ vkd3d_free(record); -+ -+ return ret; -+} -+ -+static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = context; -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = sm6_parser_read_bits(sm6, context); -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = sm6_parser_read_vbr(sm6, context); -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; -+ return !sm6->p.failed; -+} -+ -+static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) -+{ -+ int count = sm6_parser_read_vbr(sm6, 6); -+ sm6_parser_align_32(sm6); -+ for (; count > 0; count -= 4) -+ sm6_parser_read_uint32(sm6); -+ FIXME("Unhandled blob operand.\n"); -+ return false; -+} -+ -+static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) -+{ -+ enum bitcode_abbrev_type prev_type, type; -+ unsigned int i; -+ -+ abbrev->is_array = false; -+ -+ for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) -+ { -+ if (sm6_parser_read_bits(sm6, 1)) -+ { -+ if (prev_type == ABBREV_ARRAY) -+ { -+ WARN("Unexpected literal abbreviation after array.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); -+ abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; -+ continue; -+ } -+ -+ switch (type = sm6_parser_read_bits(sm6, 3)) -+ { -+ case ABBREV_FIXED: -+ case ABBREV_VBR: -+ abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); -+ abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand -+ : sm6_parser_read_vbr_operand; -+ break; -+ -+ case ABBREV_ARRAY: -+ if (prev_type == ABBREV_ARRAY || i != count - 2) -+ { -+ WARN("Unexpected array abbreviation.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->is_array = true; -+ --i; -+ --count; -+ break; -+ -+ case ABBREV_CHAR: -+ abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; -+ break; -+ -+ case ABBREV_BLOB: -+ if (prev_type == ABBREV_ARRAY || i != count - 1) -+ { -+ WARN("Unexpected blob abbreviation.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; -+ break; -+ } -+ -+ prev_type = type; -+ } -+ -+ abbrev->count = count; -+ -+ return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ unsigned int count = sm6_parser_read_vbr(sm6, 5); -+ struct dxil_global_abbrev *global_abbrev; -+ enum vkd3d_result ret; -+ -+ assert(block->id == BLOCKINFO_BLOCK); -+ -+ if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) -+ || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) -+ { -+ ERR("Failed to allocate global abbreviation.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) -+ { -+ vkd3d_free(global_abbrev); -+ return ret; -+ } -+ -+ if (!block->has_bid) -+ { -+ WARN("Missing blockinfo block id.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (block->blockinfo_bid == MODULE_BLOCK) -+ { -+ FIXME("Unhandled global abbreviation for module block.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ global_abbrev->block_id = block->blockinfo_bid; -+ -+ sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) -+{ -+ struct dxil_block *block = sm6->current_block; -+ struct dxil_abbrev *abbrev; -+ enum vkd3d_result ret; -+ unsigned int count; -+ -+ if (block->id == BLOCKINFO_BLOCK) -+ return sm6_parser_add_global_abbrev(sm6); -+ -+ count = sm6_parser_read_vbr(sm6, 5); -+ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) -+ || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) -+ { -+ ERR("Failed to allocate block abbreviation.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) -+ { -+ vkd3d_free(abbrev); -+ return ret; -+ } -+ -+ block->abbrevs[block->abbrev_count++] = abbrev; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) -+{ -+ enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; -+ struct dxil_block *block = sm6->current_block; -+ struct dxil_record *temp, *record; -+ unsigned int i, count, array_len; -+ struct dxil_abbrev *abbrev; -+ uint64_t code; -+ -+ if (abbrev_id >= block->abbrev_count) -+ { -+ WARN("Invalid abbreviation id %u.\n", abbrev_id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ abbrev = block->abbrevs[abbrev_id]; -+ if (!(count = abbrev->count)) -+ return VKD3D_OK; -+ if (count == 1 && abbrev->is_array) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ /* First operand is the record code. The array is included in the count, but will be done separately. */ -+ count -= abbrev->is_array + 1; -+ if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) -+ goto fail; -+ if (code > UINT_MAX) -+ FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); -+ record->code = code; -+ -+ for (i = 0; i < count; ++i) -+ if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) -+ goto fail; -+ record->operand_count = count; -+ -+ /* An array can occur only as the last operand. */ -+ if (abbrev->is_array) -+ { -+ array_len = sm6_parser_read_vbr(sm6, 6); -+ if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) -+ { -+ ERR("Failed to allocate record with %u operands.\n", count + array_len); -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ goto fail; -+ } -+ record = temp; -+ -+ for (i = 0; i < array_len; ++i) -+ { -+ if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, -+ &record->operands[count + i])) -+ { -+ goto fail; -+ } -+ } -+ record->operand_count += array_len; -+ } -+ -+ if ((ret = dxil_block_add_record(block, record)) < 0) -+ goto fail; -+ -+ return VKD3D_OK; -+ -+fail: -+ vkd3d_free(record); -+ return ret; -+} -+ -+static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, -+ struct sm6_parser *sm6); -+ -+static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) -+{ -+ unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; -+ struct dxil_block *block; -+ enum vkd3d_result ret; -+ -+ sm6->current_block = parent; -+ -+ do -+ { -+ unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); -+ -+ switch (abbrev_id) -+ { -+ case END_BLOCK: -+ sm6_parser_align_32(sm6); -+ return VKD3D_OK; -+ -+ case ENTER_SUBBLOCK: -+ if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) -+ { -+ WARN("Invalid subblock parent id %u.\n", parent->id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, -+ max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) -+ || !(block = vkd3d_calloc(1, sizeof(*block)))) -+ { -+ ERR("Failed to allocate block.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = dxil_block_init(block, parent, sm6)) < 0) -+ { -+ vkd3d_free(block); -+ return ret; -+ } -+ -+ parent->child_blocks[parent->child_block_count++] = block; -+ sm6->current_block = parent; -+ break; -+ -+ case DEFINE_ABBREV: -+ if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) -+ return ret; -+ break; -+ -+ case UNABBREV_RECORD: -+ if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) -+ { -+ WARN("Failed to read unabbreviated record.\n"); -+ return ret; -+ } -+ break; -+ -+ default: -+ if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) -+ { -+ WARN("Failed to read abbreviated record.\n"); -+ return ret; -+ } -+ break; -+ } -+ } while (!sm6->p.failed); -+ -+ return VKD3D_ERROR_INVALID_SHADER; -+} -+ -+static size_t sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, -+ unsigned int block_id) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < sm6->abbrev_count; ++i) -+ count += sm6->abbrevs[i]->block_id == block_id; -+ -+ return count; -+} -+ -+static void dxil_block_destroy(struct dxil_block *block) -+{ -+ size_t i; -+ -+ for (i = 0; i < block->record_count; ++i) -+ vkd3d_free(block->records[i]); -+ vkd3d_free(block->records); -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ { -+ dxil_block_destroy(block->child_blocks[i]); -+ vkd3d_free(block->child_blocks[i]); -+ } -+ vkd3d_free(block->child_blocks); -+ -+ block->records = NULL; -+ block->record_count = 0; -+ block->child_blocks = NULL; -+ block->child_block_count = 0; -+} -+ -+static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, -+ struct sm6_parser *sm6) -+{ -+ size_t i, abbrev_count = 0; -+ enum vkd3d_result ret; -+ -+ block->parent = parent; -+ block->level = parent ? parent->level + 1 : 0; -+ block->id = sm6_parser_read_vbr(sm6, 8); -+ block->abbrev_len = sm6_parser_read_vbr(sm6, 4); -+ sm6_parser_align_32(sm6); -+ block->length = sm6_parser_read_uint32(sm6); -+ block->start = sm6->ptr - sm6->start; -+ -+ if (sm6->p.failed) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) -+ { -+ if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, -+ block->abbrev_count, sizeof(*block->abbrevs))) -+ { -+ ERR("Failed to allocate block abbreviations.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < sm6->abbrev_count; ++i) -+ if (sm6->abbrevs[i]->block_id == block->id) -+ block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; -+ -+ assert(abbrev_count == block->abbrev_count); -+ } -+ -+ if ((ret = dxil_block_read(block, sm6)) < 0) -+ dxil_block_destroy(block); -+ -+ for (i = abbrev_count; i < block->abbrev_count; ++i) -+ vkd3d_free(block->abbrevs[i]); -+ vkd3d_free(block->abbrevs); -+ block->abbrevs = NULL; -+ block->abbrev_count = 0; -+ -+ return ret; -+} -+ -+static size_t dxil_block_compute_function_count(const struct dxil_block *root) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < root->child_block_count; ++i) -+ count += root->child_blocks[i]->id == FUNCTION_BLOCK; -+ -+ return count; -+} -+ -+static size_t dxil_block_compute_module_decl_count(const struct dxil_block *block) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code == MODULE_CODE_FUNCTION; -+ return count; -+} -+ -+static size_t dxil_block_compute_constants_count(const struct dxil_block *block) -+{ -+ size_t i, count; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code != CST_CODE_SETTYPE; -+ return count; -+} -+ -+static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, size_t count) -+{ -+ size_t i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free(abbrevs[i]); -+ vkd3d_free(abbrevs); -+} -+ -+static const struct dxil_block *sm6_parser_get_level_one_block(const struct sm6_parser *sm6, -+ enum bitcode_block_id id, bool *is_unique) -+{ -+ const struct dxil_block *block, *found = NULL; -+ size_t i; -+ -+ for (i = 0, *is_unique = true; i < sm6->root_block.child_block_count; ++i) -+ { -+ block = sm6->root_block.child_blocks[i]; -+ if (block->id != id) -+ continue; -+ -+ if (!found) -+ found = block; -+ else -+ *is_unique = false; -+ } -+ -+ return found; -+} -+ -+static char *dxil_record_to_string(const struct dxil_record *record, unsigned int offset) -+{ -+ unsigned int i; -+ char *str; -+ -+ assert(offset <= record->operand_count); -+ if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) -+ return NULL; -+ -+ for (i = offset; i < record->operand_count; ++i) -+ str[i - offset] = record->operands[i]; -+ -+ return str; -+} -+ -+static bool dxil_record_validate_operand_min_count(const struct dxil_record *record, unsigned int min_count, -+ struct sm6_parser *sm6) -+{ -+ if (record->operand_count >= min_count) -+ return true; -+ -+ WARN("Invalid operand count %u for code %u.\n", record->operand_count, record->code); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Invalid operand count %u for record code %u.", record->operand_count, record->code); -+ return false; -+} -+ -+static void dxil_record_validate_operand_max_count(const struct dxil_record *record, unsigned int max_count, -+ struct sm6_parser *sm6) -+{ -+ if (record->operand_count <= max_count) -+ return; -+ -+ WARN("Ignoring %u extra operands for code %u.\n", record->operand_count - max_count, record->code); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %u extra operands for record code %u.", record->operand_count - max_count, record->code); -+} -+ -+static bool dxil_record_validate_operand_count(const struct dxil_record *record, unsigned int min_count, -+ unsigned int max_count, struct sm6_parser *sm6) -+{ -+ dxil_record_validate_operand_max_count(record, max_count, sm6); -+ return dxil_record_validate_operand_min_count(record, min_count, sm6); -+} -+ -+static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_record *record; -+ size_t i, type_count, type_index; -+ const struct dxil_block *block; -+ char *struct_name = NULL; -+ unsigned int j, count; -+ struct sm6_type *type; -+ uint64_t type_id; -+ bool is_unique; -+ -+ sm6->p.location.line = 0; -+ sm6->p.location.column = 0; -+ -+ if (!(block = sm6_parser_get_level_one_block(sm6, TYPE_BLOCK, &is_unique))) -+ { -+ WARN("No type definitions found.\n"); -+ return VKD3D_OK; -+ } -+ if (!is_unique) -+ WARN("Ignoring invalid extra type table(s).\n"); -+ -+ sm6->p.location.line = block->id; -+ -+ type_count = 0; -+ for (i = 0; i < block->record_count; ++i) -+ type_count += block->records[i]->code != TYPE_CODE_NUMENTRY && block->records[i]->code != TYPE_CODE_STRUCT_NAME; -+ -+ /* The type array must not be relocated. */ -+ if (!(sm6->types = vkd3d_calloc(type_count, sizeof(*sm6->types)))) -+ { -+ ERR("Failed to allocate type array.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ -+ type = &sm6->types[sm6->type_count]; -+ type_index = sm6->type_count; -+ -+ switch (record->code) -+ { -+ case TYPE_CODE_ARRAY: -+ case TYPE_CODE_VECTOR: -+ if (!dxil_record_validate_operand_count(record, 2, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = record->code == TYPE_CODE_ARRAY ? TYPE_CLASS_ARRAY : TYPE_CLASS_VECTOR; -+ -+ if (!(type->u.array.count = record->operands[0])) -+ { -+ TRACE("Setting unbounded for type %zu.\n", type_index); -+ type->u.array.count = UINT_MAX; -+ } -+ -+ if ((type_id = record->operands[1]) >= type_count) -+ { -+ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.array.elem_type = &sm6->types[type_id]; -+ break; -+ -+ case TYPE_CODE_DOUBLE: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 64; -+ break; -+ -+ case TYPE_CODE_FLOAT: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 32; -+ break; -+ -+ case TYPE_CODE_FUNCTION: -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if (record->operands[0]) -+ FIXME("Unhandled vararg function type %zu.\n", type_index); -+ -+ type->class = TYPE_CLASS_FUNCTION; -+ -+ if ((type_id = record->operands[1]) >= type_count) -+ { -+ WARN("Invalid return type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ count = record->operand_count - 2; -+ if (vkd3d_object_range_overflow(sizeof(type->u.function), count, sizeof(type->u.function->param_types[0])) -+ || !(type->u.function = vkd3d_malloc(offsetof(struct sm6_function_info, param_types[count])))) -+ { -+ ERR("Failed to allocate function parameter types.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ type->u.function->ret_type = &sm6->types[type_id]; -+ type->u.function->param_count = count; -+ for (j = 0; j < count; ++j) -+ { -+ if ((type_id = record->operands[j + 2]) >= type_count) -+ { -+ WARN("Invalid parameter type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ vkd3d_free(type->u.function); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.function->param_types[j] = &sm6->types[type_id]; -+ } -+ break; -+ -+ case TYPE_CODE_HALF: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_FLOAT; -+ type->u.width = 16; -+ break; -+ -+ case TYPE_CODE_INTEGER: -+ { -+ uint64_t width; -+ -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = TYPE_CLASS_INTEGER; -+ -+ switch ((width = record->operands[0])) -+ { -+ case 1: -+ case 8: -+ case 16: -+ case 32: -+ case 64: -+ break; -+ default: -+ WARN("Invalid integer width %"PRIu64" for type %zu.\n", width, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.width = width; -+ break; -+ } -+ -+ case TYPE_CODE_LABEL: -+ type->class = TYPE_CLASS_LABEL; -+ break; -+ -+ case TYPE_CODE_METADATA: -+ type->class = TYPE_CLASS_METADATA; -+ break; -+ -+ case TYPE_CODE_NUMENTRY: -+ continue; -+ -+ case TYPE_CODE_POINTER: -+ if (!dxil_record_validate_operand_count(record, 1, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ type->class = TYPE_CLASS_POINTER; -+ -+ if ((type_id = record->operands[0]) >= type_count) -+ { -+ WARN("Invalid pointee type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.pointer.type = &sm6->types[type_id]; -+ type->u.pointer.addr_space = (record->operand_count > 1) ? record->operands[1] : ADDRESS_SPACE_DEFAULT; -+ break; -+ -+ case TYPE_CODE_STRUCT_ANON: -+ case TYPE_CODE_STRUCT_NAMED: -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if (record->code == TYPE_CODE_STRUCT_NAMED && !struct_name) -+ { -+ WARN("Missing struct name before struct type %zu.\n", type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ type->class = TYPE_CLASS_STRUCT; -+ -+ count = record->operand_count - 1; -+ if (vkd3d_object_range_overflow(sizeof(type->u.struc), count, sizeof(type->u.struc->elem_types[0])) -+ || !(type->u.struc = vkd3d_malloc(offsetof(struct sm6_struct_info, elem_types[count])))) -+ { -+ ERR("Failed to allocate struct element types.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (record->operands[0]) -+ FIXME("Ignoring struct packed attribute.\n"); -+ -+ type->u.struc->elem_count = count; -+ for (j = 0; j < count; ++j) -+ { -+ if ((type_id = record->operands[j + 1]) >= type_count) -+ { -+ WARN("Invalid contained type id %"PRIu64" for type %zu.\n", type_id, type_index); -+ vkd3d_free(type->u.struc); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ type->u.struc->elem_types[j] = &sm6->types[type_id]; -+ } -+ -+ if (record->code == TYPE_CODE_STRUCT_ANON) -+ { -+ type->u.struc->name = NULL; -+ break; -+ } -+ -+ type->u.struc->name = struct_name; -+ struct_name = NULL; -+ break; -+ -+ case TYPE_CODE_STRUCT_NAME: -+ if (!(struct_name = dxil_record_to_string(record, 0))) -+ { -+ ERR("Failed to allocate struct name.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ if (!struct_name[0]) -+ WARN("Struct name is empty for type %zu.\n", type_index); -+ continue; -+ -+ case TYPE_CODE_VOID: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ type->class = TYPE_CLASS_VOID; -+ break; -+ -+ default: -+ FIXME("Unhandled type %u at index %zu.\n", record->code, type_index); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ ++sm6->type_count; -+ } -+ -+ assert(sm6->type_count == type_count); -+ -+ if (struct_name) -+ { -+ WARN("Unused struct name %s.\n", struct_name); -+ vkd3d_free(struct_name); -+ } -+ -+ return VKD3D_OK; -+} -+ -+static inline bool sm6_type_is_void(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_VOID; -+} -+ -+static inline bool sm6_type_is_integer(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER; -+} -+ -+static inline bool sm6_type_is_i8(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER && type->u.width == 8; -+} -+ -+static inline bool sm6_type_is_i32(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER && type->u.width == 32; -+} -+ -+static inline bool sm6_type_is_floating_point(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_FLOAT; -+} -+ -+static inline bool sm6_type_is_numeric(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_INTEGER || type->class == TYPE_CLASS_FLOAT; -+} -+ -+static inline bool sm6_type_is_pointer(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_POINTER; -+} -+ -+static bool sm6_type_is_numeric_aggregate(const struct sm6_type *type) -+{ -+ unsigned int i; -+ -+ switch (type->class) -+ { -+ case TYPE_CLASS_ARRAY: -+ case TYPE_CLASS_VECTOR: -+ return sm6_type_is_numeric(type->u.array.elem_type); -+ -+ case TYPE_CLASS_STRUCT: -+ /* Do not handle nested structs. Support can be added if they show up. */ -+ for (i = 0; i < type->u.struc->elem_count; ++i) -+ if (!sm6_type_is_numeric(type->u.struc->elem_types[i])) -+ return false; -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+static inline bool sm6_type_is_struct(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_STRUCT; -+} -+ -+static inline bool sm6_type_is_function(const struct sm6_type *type) -+{ -+ return type->class == TYPE_CLASS_FUNCTION; -+} -+ -+static inline bool sm6_type_is_function_pointer(const struct sm6_type *type) -+{ -+ return sm6_type_is_pointer(type) && sm6_type_is_function(type->u.pointer.type); -+} -+ -+static inline bool sm6_type_is_handle(const struct sm6_type *type) -+{ -+ return sm6_type_is_struct(type) && !strcmp(type->u.struc->name, "dx.types.Handle"); -+} -+ -+static inline const struct sm6_type *sm6_type_get_element_type(const struct sm6_type *type) -+{ -+ return (type->class == TYPE_CLASS_ARRAY || type->class == TYPE_CLASS_VECTOR) ? type->u.array.elem_type : type; -+} -+ -+static const struct sm6_type *sm6_type_get_pointer_to_type(const struct sm6_type *type, -+ enum bitcode_address_space addr_space, struct sm6_parser *sm6) -+{ -+ size_t i, start = type - sm6->types; -+ const struct sm6_type *pointer_type; -+ -+ /* DXC seems usually to place the pointer type immediately after its pointee. */ -+ for (i = (start + 1) % sm6->type_count; i != start; i = (i + 1) % sm6->type_count) -+ { -+ pointer_type = &sm6->types[i]; -+ if (sm6_type_is_pointer(pointer_type) && pointer_type->u.pointer.type == type -+ && pointer_type->u.pointer.addr_space == addr_space) -+ return pointer_type; -+ } -+ -+ return NULL; -+} -+ -+/* Never returns null for elem_idx 0. */ -+static const struct sm6_type *sm6_type_get_scalar_type(const struct sm6_type *type, unsigned int elem_idx) -+{ -+ switch (type->class) -+ { -+ case TYPE_CLASS_ARRAY: -+ case TYPE_CLASS_VECTOR: -+ if (elem_idx >= type->u.array.count) -+ return NULL; -+ return sm6_type_get_scalar_type(type->u.array.elem_type, 0); -+ -+ case TYPE_CLASS_POINTER: -+ return sm6_type_get_scalar_type(type->u.pointer.type, 0); -+ -+ case TYPE_CLASS_STRUCT: -+ if (elem_idx >= type->u.struc->elem_count) -+ return NULL; -+ return sm6_type_get_scalar_type(type->u.struc->elem_types[elem_idx], 0); -+ -+ default: -+ return type; -+ } -+} -+ -+static const struct sm6_type *sm6_parser_get_type(struct sm6_parser *sm6, uint64_t type_id) -+{ -+ if (type_id >= sm6->type_count) -+ { -+ WARN("Invalid type index %"PRIu64" at %zu.\n", type_id, sm6->value_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID, -+ "DXIL type id %"PRIu64" is invalid.", type_id); -+ return NULL; -+ } -+ return &sm6->types[type_id]; -+} -+ -+static int global_symbol_compare(const void *a, const void *b) -+{ -+ return vkd3d_u32_compare(((const struct sm6_symbol *)a)->id, ((const struct sm6_symbol *)b)->id); -+} -+ -+static enum vkd3d_result sm6_parser_symtab_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_record *record; -+ const struct dxil_block *block; -+ struct sm6_symbol *symbol; -+ size_t i, count; -+ bool is_unique; -+ -+ sm6->p.location.line = 0; -+ sm6->p.location.column = 0; -+ -+ if (!(block = sm6_parser_get_level_one_block(sm6, VALUE_SYMTAB_BLOCK, &is_unique))) -+ { -+ /* There should always be at least one symbol: the name of the entry point function. */ -+ WARN("No value symtab block found.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!is_unique) -+ FIXME("Ignoring extra value symtab block(s).\n"); -+ -+ sm6->p.location.line = block->id; -+ -+ for (i = 0, count = 0; i < block->record_count; ++i) -+ count += block->records[i]->code == VST_CODE_ENTRY; -+ -+ if (!(sm6->global_symbols = vkd3d_calloc(count, sizeof(*sm6->global_symbols)))) -+ { -+ ERR("Failed to allocate global symbols.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ -+ if (record->code != VST_CODE_ENTRY) -+ { -+ FIXME("Unhandled symtab code %u.\n", record->code); -+ continue; -+ } -+ if (!dxil_record_validate_operand_min_count(record, 1, sm6)) -+ continue; -+ -+ symbol = &sm6->global_symbols[sm6->global_symbol_count]; -+ symbol->id = record->operands[0]; -+ if (!(symbol->name = dxil_record_to_string(record, 1))) -+ { -+ ERR("Failed to allocate symbol name.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ ++sm6->global_symbol_count; -+ } -+ -+ sm6->p.location.column = block->record_count; -+ -+ qsort(sm6->global_symbols, sm6->global_symbol_count, sizeof(*sm6->global_symbols), global_symbol_compare); -+ for (i = 1; i < sm6->global_symbol_count; ++i) -+ { -+ if (sm6->global_symbols[i].id == sm6->global_symbols[i - 1].id) -+ { -+ WARN("Invalid duplicate symbol id %u.\n", sm6->global_symbols[i].id); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static const char *sm6_parser_get_global_symbol_name(const struct sm6_parser *sm6, size_t id) -+{ -+ size_t i, start; -+ -+ /* id == array index is normally true */ -+ i = start = id % sm6->global_symbol_count; -+ do -+ { -+ if (sm6->global_symbols[i].id == id) -+ return sm6->global_symbols[i].name; -+ i = (i + 1) % sm6->global_symbol_count; -+ } while (i != start); -+ -+ return NULL; -+} -+ -+static unsigned int register_get_uint_value(const struct vkd3d_shader_register *reg) -+{ -+ if (!register_is_constant(reg) || !data_type_is_integer(reg->data_type)) -+ return UINT_MAX; -+ -+ if (reg->immconst_type == VKD3D_IMMCONST_VEC4) -+ WARN("Returning vec4.x.\n"); -+ -+ if (reg->type == VKD3DSPR_IMMCONST64) -+ { -+ if (reg->u.immconst_uint64[0] > UINT_MAX) -+ FIXME("Truncating 64-bit value.\n"); -+ return reg->u.immconst_uint64[0]; -+ } -+ -+ return reg->u.immconst_uint[0]; -+} -+ -+static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) -+{ -+ return value->value_type == VALUE_TYPE_FUNCTION; -+} -+ -+static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) -+{ -+ assert(sm6_value_is_function_dcl(fn)); -+ return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); -+} -+ -+static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) -+{ -+ assert(sm6->value_count < sm6->value_capacity); -+ return &sm6->values[sm6->value_count]; -+} -+ -+static inline bool sm6_value_is_register(const struct sm6_value *value) -+{ -+ return value->value_type == VALUE_TYPE_REG; -+} -+ -+static inline bool sm6_value_is_constant(const struct sm6_value *value) -+{ -+ return sm6_value_is_register(value) && register_is_constant(&value->u.reg); -+} -+ -+static inline bool sm6_value_is_undef(const struct sm6_value *value) -+{ -+ return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; -+} -+ -+static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) -+{ -+ if (!sm6_value_is_constant(value)) -+ return UINT_MAX; -+ return register_get_uint_value(&value->u.reg); -+} -+ -+static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_shader_instruction *ins, -+ unsigned int count, struct sm6_parser *sm6) -+{ -+ struct vkd3d_shader_src_param *params = shader_parser_get_src_params(&sm6->p, count); -+ if (!params) -+ { -+ ERR("Failed to allocate src params.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating instruction src paramaters."); -+ return NULL; -+ } -+ ins->src = params; -+ ins->src_count = count; -+ return params; -+} -+ -+static struct vkd3d_shader_dst_param *instruction_dst_params_alloc(struct vkd3d_shader_instruction *ins, -+ unsigned int count, struct sm6_parser *sm6) -+{ -+ struct vkd3d_shader_dst_param *params = shader_parser_get_dst_params(&sm6->p, count); -+ if (!params) -+ { -+ ERR("Failed to allocate dst params.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating instruction dst paramaters."); -+ return NULL; -+ } -+ ins->dst = params; -+ ins->dst_count = count; -+ return params; -+} -+ -+static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type *type) -+{ -+ if (type->class == TYPE_CLASS_INTEGER) -+ { -+ switch (type->u.width) -+ { -+ case 8: -+ return VKD3D_DATA_UINT8; -+ case 32: -+ return VKD3D_DATA_UINT; -+ default: -+ FIXME("Unhandled width %u.\n", type->u.width); -+ return VKD3D_DATA_UINT; -+ } -+ } -+ else if (type->class == TYPE_CLASS_FLOAT) -+ { -+ switch (type->u.width) -+ { -+ case 32: -+ return VKD3D_DATA_FLOAT; -+ case 64: -+ return VKD3D_DATA_DOUBLE; -+ default: -+ FIXME("Unhandled width %u.\n", type->u.width); -+ return VKD3D_DATA_FLOAT; -+ } -+ } -+ -+ FIXME("Unhandled type %u.\n", type->class); -+ return VKD3D_DATA_UINT; -+} -+ -+static inline void dst_param_init_scalar(struct vkd3d_shader_dst_param *param, unsigned int component_idx) -+{ -+ param->write_mask = 1u << component_idx; -+ param->modifiers = 0; -+ param->shift = 0; -+} -+ -+static inline void src_param_init(struct vkd3d_shader_src_param *param) -+{ -+ param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ param->modifiers = VKD3DSPSM_NONE; -+} -+ -+static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) -+{ -+ src_param_init(param); -+ param->reg = src->u.reg; -+} -+ -+static void register_address_init(struct vkd3d_shader_register *reg, const struct sm6_value *address, -+ unsigned int idx, struct sm6_parser *sm6) -+{ -+ assert(idx < ARRAY_SIZE(reg->idx)); -+ if (sm6_value_is_constant(address)) -+ { -+ reg->idx[idx].offset = sm6_value_get_constant_uint(address); -+ } -+ else if (sm6_value_is_undef(address)) -+ { -+ reg->idx[idx].offset = 0; -+ } -+ else -+ { -+ struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&sm6->p, 1); -+ if (rel_addr) -+ src_param_init_from_value(rel_addr, address); -+ reg->idx[idx].offset = 0; -+ reg->idx[idx].rel_addr = rel_addr; -+ } -+} -+ -+/* Recurse through the block tree while maintaining a current value count. The current -+ * count is the sum of the global count plus all declarations within the current function. -+ * Store into value_capacity the highest count seen. */ -+static size_t sm6_parser_compute_max_value_count(struct sm6_parser *sm6, -+ const struct dxil_block *block, size_t value_count) -+{ -+ size_t i, old_value_count = value_count; -+ -+ if (block->id == MODULE_BLOCK) -+ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_module_decl_count(block)); -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ value_count = sm6_parser_compute_max_value_count(sm6, block->child_blocks[i], value_count); -+ -+ switch (block->id) -+ { -+ case CONSTANTS_BLOCK: -+ /* Function local constants are contained in a child block of the function block. */ -+ value_count = size_add_with_overflow_check(value_count, dxil_block_compute_constants_count(block)); -+ break; -+ case FUNCTION_BLOCK: -+ /* A function must start with a block count, which emits no value. This formula is likely to -+ * overestimate the value count somewhat, but this should be no problem. */ -+ value_count = size_add_with_overflow_check(value_count, max(block->record_count, 1u) - 1); -+ sm6->value_capacity = max(sm6->value_capacity, value_count); -+ sm6->functions[sm6->function_count].value_count = value_count; -+ /* The value count returns to its previous value after handling a function. */ -+ if (value_count < SIZE_MAX) -+ value_count = old_value_count; -+ break; -+ default: -+ break; -+ } -+ -+ return value_count; -+} -+ -+static size_t sm6_parser_get_value_index(struct sm6_parser *sm6, uint64_t idx) -+{ -+ size_t i; -+ -+ /* The value relative index is 32 bits. */ -+ if (idx > UINT32_MAX) -+ WARN("Ignoring upper 32 bits of relative index.\n"); -+ i = (uint32_t)sm6->value_count - (uint32_t)idx; -+ -+ /* This may underflow to produce a forward reference, but it must not exceeed the final value count. */ -+ if (i >= sm6->cur_max_value) -+ { -+ WARN("Invalid value index %"PRIx64" at %zu.\n", idx, sm6->value_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid value relative index %u.", (unsigned int)idx); -+ return SIZE_MAX; -+ } -+ if (i == sm6->value_count) -+ { -+ WARN("Invalid value self-reference at %zu.\n", sm6->value_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Invalid value self-reference."); -+ return SIZE_MAX; -+ } -+ -+ return i; -+} -+ -+static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const struct dxil_record *record, -+ const struct sm6_type *fwd_type, unsigned int *rec_idx) -+{ -+ unsigned int idx; -+ uint64_t val_ref; -+ size_t operand; -+ -+ idx = *rec_idx; -+ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) -+ return SIZE_MAX; -+ val_ref = record->operands[idx++]; -+ -+ operand = sm6_parser_get_value_index(sm6, val_ref); -+ if (operand == SIZE_MAX) -+ return SIZE_MAX; -+ -+ if (operand >= sm6->value_count) -+ { -+ if (!fwd_type) -+ { -+ /* Forward references are followed by a type id unless an earlier operand set the type, -+ * or it is contained in a function declaration. */ -+ if (!dxil_record_validate_operand_min_count(record, idx + 1, sm6)) -+ return SIZE_MAX; -+ if (!(fwd_type = sm6_parser_get_type(sm6, record->operands[idx++]))) -+ return SIZE_MAX; -+ } -+ FIXME("Forward value references are not supported yet.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Unsupported value forward reference."); -+ return SIZE_MAX; -+ } -+ *rec_idx = idx; -+ -+ return operand; -+} -+ -+static const struct sm6_value *sm6_parser_get_value_by_ref(struct sm6_parser *sm6, -+ const struct dxil_record *record, const struct sm6_type *type, unsigned int *rec_idx) -+{ -+ size_t operand = sm6_parser_get_value_idx_by_ref(sm6, record, type, rec_idx); -+ return operand == SIZE_MAX ? NULL : &sm6->values[operand]; -+} -+ -+static bool sm6_parser_declare_function(struct sm6_parser *sm6, const struct dxil_record *record) -+{ -+ const unsigned int max_count = 15; -+ const struct sm6_type *ret_type; -+ struct sm6_value *fn; -+ unsigned int i, j; -+ -+ if (!dxil_record_validate_operand_count(record, 8, max_count, sm6)) -+ return false; -+ -+ fn = sm6_parser_get_current_value(sm6); -+ fn->value_type = VALUE_TYPE_FUNCTION; -+ if (!(fn->u.function.name = sm6_parser_get_global_symbol_name(sm6, sm6->value_count))) -+ { -+ WARN("Missing symbol name for function %zu.\n", sm6->value_count); -+ fn->u.function.name = ""; -+ } -+ -+ if (!(fn->type = sm6_parser_get_type(sm6, record->operands[0]))) -+ return false; -+ if (!sm6_type_is_function(fn->type)) -+ { -+ WARN("Type is not a function.\n"); -+ return false; -+ } -+ ret_type = fn->type->u.function->ret_type; -+ -+ if (!(fn->type = sm6_type_get_pointer_to_type(fn->type, ADDRESS_SPACE_DEFAULT, sm6))) -+ { -+ WARN("Failed to get pointer type for type %u.\n", fn->type->class); -+ return false; -+ } -+ -+ if (record->operands[1]) -+ WARN("Ignoring calling convention %#"PRIx64".\n", record->operands[1]); -+ -+ fn->u.function.is_prototype = !!record->operands[2]; -+ -+ if (record->operands[3]) -+ WARN("Ignoring linkage %#"PRIx64".\n", record->operands[3]); -+ -+ if (record->operands[4] > UINT_MAX) -+ WARN("Invalid attributes id %#"PRIx64".\n", record->operands[4]); -+ /* 1-based index. */ -+ if ((fn->u.function.attribs_id = record->operands[4])) -+ TRACE("Ignoring function attributes.\n"); -+ -+ /* These always seem to be zero. */ -+ for (i = 5, j = 0; i < min(record->operand_count, max_count); ++i) -+ j += !!record->operands[i]; -+ if (j) -+ WARN("Ignoring %u operands.\n", j); -+ -+ if (sm6_value_is_dx_intrinsic_dcl(fn) && !sm6_type_is_void(ret_type) && !sm6_type_is_numeric(ret_type) -+ && !sm6_type_is_numeric_aggregate(ret_type) && !sm6_type_is_handle(ret_type)) -+ { -+ WARN("Unexpected return type for dx intrinsic function '%s'.\n", fn->u.function.name); -+ } -+ -+ ++sm6->value_count; -+ -+ return true; -+} -+ -+static inline uint64_t decode_rotated_signed_value(uint64_t value) -+{ -+ if (value != 1) -+ { -+ bool neg = value & 1; -+ value >>= 1; -+ return neg ? -value : value; -+ } -+ return value << 63; -+} -+ -+static inline float bitcast_uint64_to_float(uint64_t value) -+{ -+ union -+ { -+ uint32_t uint32_value; -+ float float_value; -+ } u; -+ -+ u.uint32_value = value; -+ return u.float_value; -+} -+ -+static inline double bitcast_uint64_to_double(uint64_t value) -+{ -+ union -+ { -+ uint64_t uint64_value; -+ double double_value; -+ } u; -+ -+ u.uint64_value = value; -+ return u.double_value; -+} -+ -+static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) -+{ -+ enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; -+ const struct sm6_type *type, *elem_type; -+ enum vkd3d_data_type reg_data_type; -+ const struct dxil_record *record; -+ struct sm6_value *dst; -+ size_t i, value_idx; -+ uint64_t value; -+ -+ for (i = 0, type = NULL; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ value_idx = sm6->value_count; -+ -+ if (record->code == CST_CODE_SETTYPE) -+ { -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!(type = sm6_parser_get_type(sm6, record->operands[0]))) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ elem_type = sm6_type_get_element_type(type); -+ if (sm6_type_is_numeric(elem_type)) -+ { -+ reg_data_type = vkd3d_data_type_from_sm6_type(elem_type); -+ reg_type = elem_type->u.width > 32 ? VKD3DSPR_IMMCONST64 : VKD3DSPR_IMMCONST; -+ } -+ else -+ { -+ reg_data_type = VKD3D_DATA_UNUSED; -+ reg_type = VKD3DSPR_INVALID; -+ } -+ -+ if (i == block->record_count - 1) -+ WARN("Unused SETTYPE record.\n"); -+ -+ continue; -+ } -+ -+ if (!type) -+ { -+ WARN("Constant record %zu has no type.\n", value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ dst = sm6_parser_get_current_value(sm6); -+ dst->type = type; -+ dst->value_type = VALUE_TYPE_REG; -+ dst->u.reg.type = reg_type; -+ dst->u.reg.immconst_type = VKD3D_IMMCONST_SCALAR; -+ dst->u.reg.data_type = reg_data_type; -+ -+ switch (record->code) -+ { -+ case CST_CODE_NULL: -+ /* Register constant data is already zero-filled. */ -+ break; -+ -+ case CST_CODE_INTEGER: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!sm6_type_is_integer(type)) -+ { -+ WARN("Invalid integer of non-integer type %u at constant idx %zu.\n", type->class, value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ value = decode_rotated_signed_value(record->operands[0]); -+ if (type->u.width <= 32) -+ dst->u.reg.u.immconst_uint[0] = value & ((1ull << type->u.width) - 1); -+ else -+ dst->u.reg.u.immconst_uint64[0] = value; -+ -+ break; -+ -+ case CST_CODE_FLOAT: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ -+ if (!sm6_type_is_floating_point(type)) -+ { -+ WARN("Invalid float of non-fp type %u at constant idx %zu.\n", type->class, value_idx); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (type->u.width == 16) -+ FIXME("Half float type is not supported yet.\n"); -+ else if (type->u.width == 32) -+ dst->u.reg.u.immconst_float[0] = bitcast_uint64_to_float(record->operands[0]); -+ else if (type->u.width == 64) -+ dst->u.reg.u.immconst_double[0] = bitcast_uint64_to_double(record->operands[0]); -+ else -+ vkd3d_unreachable(); -+ -+ break; -+ -+ case CST_CODE_DATA: -+ WARN("Unhandled constant array.\n"); -+ break; -+ -+ case CST_CODE_UNDEF: -+ dxil_record_validate_operand_max_count(record, 0, sm6); -+ dst->u.reg.type = VKD3DSPR_UNDEF; -+ /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ -+ dst->is_undefined = true; -+ break; -+ -+ default: -+ FIXME("Unhandled constant code %u.\n", record->code); -+ dst->u.reg.type = VKD3DSPR_UNDEF; -+ break; -+ } -+ -+ ++sm6->value_count; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static struct vkd3d_shader_instruction *sm6_parser_require_space(struct sm6_parser *sm6, size_t extra) -+{ -+ if (!shader_instruction_array_reserve(&sm6->p.instructions, sm6->p.instructions.count + extra)) -+ { -+ ERR("Failed to allocate instruction.\n"); -+ return NULL; -+ } -+ return &sm6->p.instructions.elements[sm6->p.instructions.count]; -+} -+ -+/* Space should be reserved before calling this. It is intended to require no checking of the returned pointer. */ -+static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_parser *sm6, -+ enum vkd3d_shader_opcode handler_idx) -+{ -+ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); -+ assert(ins); -+ shader_instruction_init(ins, handler_idx); -+ ++sm6->p.instructions.count; -+ return ins; -+} -+ -+static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) -+{ -+ const struct dxil_block *block = &sm6->root_block; -+ const struct dxil_record *record; -+ uint64_t version; -+ size_t i; -+ -+ sm6->p.location.line = block->id; -+ sm6->p.location.column = 0; -+ -+ for (i = 0; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ record = block->records[i]; -+ switch (record->code) -+ { -+ case MODULE_CODE_FUNCTION: -+ if (!sm6_parser_declare_function(sm6, record)) -+ { -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL, -+ "A DXIL function declaration is invalid."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ break; -+ -+ case MODULE_CODE_GLOBALVAR: -+ FIXME("Global variables are not implemented yet.\n"); -+ break; -+ -+ case MODULE_CODE_VERSION: -+ if (!dxil_record_validate_operand_count(record, 1, 1, sm6)) -+ return VKD3D_ERROR_INVALID_SHADER; -+ if ((version = record->operands[0]) != 1) -+ { -+ FIXME("Unsupported format version %#"PRIx64".\n", version); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT, -+ "Bitcode format version %#"PRIx64" is unsupported.", version); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ break; -+ -+ default: -+ break; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static void dst_param_io_init(struct vkd3d_shader_dst_param *param, -+ const struct signature_element *e, enum vkd3d_shader_register_type reg_type) -+{ -+ enum vkd3d_shader_component_type component_type; -+ -+ param->write_mask = e->mask; -+ param->modifiers = 0; -+ param->shift = 0; -+ /* DXIL types do not have signedness. Load signed elements as unsigned. */ -+ component_type = e->component_type == VKD3D_SHADER_COMPONENT_INT ? VKD3D_SHADER_COMPONENT_UINT : e->component_type; -+ shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(component_type), 0); -+} -+ -+static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shader_signature *s, -+ enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) -+{ -+ struct vkd3d_shader_dst_param *param; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < s->element_count; ++i) -+ { -+ e = &s->elements[i]; -+ -+ param = ¶ms[i]; -+ dst_param_io_init(param, e, reg_type); -+ param->reg.idx[0].offset = i; -+ param->reg.idx_count = 1; -+ } -+} -+ -+static void sm6_parser_emit_signature(struct sm6_parser *sm6, const struct shader_signature *s, -+ enum vkd3d_shader_opcode handler_idx, enum vkd3d_shader_opcode siv_handler_idx, -+ struct vkd3d_shader_dst_param *params) -+{ -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_dst_param *param; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < s->element_count; ++i) -+ { -+ e = &s->elements[i]; -+ -+ /* Do not check e->used_mask because in some cases it is zero for used elements. -+ * TODO: scan ahead for used I/O elements. */ -+ -+ if (e->sysval_semantic != VKD3D_SHADER_SV_NONE && e->sysval_semantic != VKD3D_SHADER_SV_TARGET) -+ { -+ ins = sm6_parser_add_instruction(sm6, siv_handler_idx); -+ param = &ins->declaration.register_semantic.reg; -+ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); -+ } -+ else -+ { -+ ins = sm6_parser_add_instruction(sm6, handler_idx); -+ param = &ins->declaration.dst; -+ } -+ -+ *param = params[i]; -+ } -+} -+ -+static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) -+{ -+ sm6_parser_init_signature(sm6, output_signature, -+ (sm6->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3DSPR_COLOROUT : VKD3DSPR_OUTPUT, -+ sm6->output_params); -+} -+ -+static void sm6_parser_emit_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) -+{ -+ sm6_parser_emit_signature(sm6, output_signature, VKD3DSIH_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT_SIV, sm6->output_params); -+} -+ -+static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) -+{ -+ size_t i, count = sm6->function_count; -+ -+ for (i = 0; i < sm6->value_count; ++i) -+ { -+ if (sm6_type_is_function_pointer(sm6->values[i].type) && !sm6->values[i].u.function.is_prototype && !count--) -+ break; -+ } -+ if (i == sm6->value_count) -+ return NULL; -+ -+ ++sm6->function_count; -+ return &sm6->values[i]; -+} -+ -+static struct sm6_block *sm6_block_create() -+{ -+ struct sm6_block *block = vkd3d_calloc(1, sizeof(*block)); -+ return block; -+} -+ -+static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, struct sm6_block *code_block, -+ enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct vkd3d_shader_instruction *ins) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_dst_param *dst_param; -+ const struct shader_signature *signature; -+ unsigned int row_index, column_index; -+ const struct signature_element *e; -+ const struct sm6_value *value; -+ -+ row_index = sm6_value_get_constant_uint(operands[0]); -+ column_index = sm6_value_get_constant_uint(operands[2]); -+ -+ signature = &sm6->p.shader_desc.output_signature; -+ if (row_index >= signature->element_count) -+ { -+ WARN("Invalid row index %u.\n", row_index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid output row index %u.", row_index); -+ return; -+ } -+ e = &signature->elements[row_index]; -+ -+ if (column_index >= VKD3D_VEC4_SIZE) -+ { -+ WARN("Invalid column index %u.\n", column_index); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid output column index %u.", column_index); -+ return; -+ } -+ -+ value = operands[3]; -+ if (!sm6_value_is_register(value)) -+ { -+ WARN("Source value is not a register.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Expected store operation source to be a register."); -+ return; -+ } -+ -+ shader_instruction_init(ins, VKD3DSIH_MOV); -+ -+ if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) -+ return; -+ dst_param_init_scalar(dst_param, column_index); -+ dst_param->reg = sm6->output_params[row_index].reg; -+ if (e->register_count > 1) -+ register_address_init(&dst_param->reg, operands[1], 0, sm6); -+ -+ if ((src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ src_param_init_from_value(src_param, value); -+} -+ -+struct sm6_dx_opcode_info -+{ -+ const char ret_type; -+ const char *operand_info; -+ void (*handler)(struct sm6_parser *, struct sm6_block *, enum dx_intrinsic_opcode, -+ const struct sm6_value **, struct vkd3d_shader_instruction *); -+}; -+ -+/* -+ 8 -> int8 -+ i -> int32 -+ v -> void -+ o -> overloaded -+ */ -+static const struct sm6_dx_opcode_info sm6_dx_op_table[] = -+{ -+ [DX_STORE_OUTPUT ] = {'v', "ii8o", sm6_parser_emit_dx_store_output}, -+}; -+ -+static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_type *type, char info_type) -+{ -+ switch (info_type) -+ { -+ case 0: -+ FIXME("Invalid operand count.\n"); -+ return false; -+ case '8': -+ return sm6_type_is_i8(type); -+ case 'i': -+ return sm6_type_is_i32(type); -+ case 'v': -+ return !type; -+ case 'o': -+ /* TODO: some type checking may be possible */ -+ return true; -+ default: -+ FIXME("Unhandled operand code '%c'.\n", info_type); -+ return false; -+ } -+} -+ -+static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const char *name, -+ const struct sm6_value **operands, unsigned int operand_count, struct sm6_value *dst) -+{ -+ const struct sm6_dx_opcode_info *info; -+ unsigned int i; -+ -+ info = &sm6_dx_op_table[op]; -+ -+ if (!sm6_parser_validate_operand_type(sm6, dst->type, info->ret_type)) -+ { -+ WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); -+ /* Return type validation failure is not so critical. We only need to set -+ * a data type for the SSA result. */ -+ } -+ -+ for (i = 0; i < operand_count; ++i) -+ { -+ const struct sm6_value *value = operands[i]; -+ if (!sm6_value_is_register(value) || !sm6_parser_validate_operand_type(sm6, value->type, info->operand_info[i])) -+ { -+ WARN("Failed to validate operand %u for dx intrinsic id %u, '%s'.\n", i + 1, op, name); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Operand %u for call to dx intrinsic function '%s' is invalid.", i + 1, name); -+ return false; -+ } -+ } -+ if (info->operand_info[operand_count]) -+ { -+ WARN("Missing operands for dx intrinsic id %u, '%s'.\n", op, name); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "Call to dx intrinsic function '%s' has missing operands.", name); -+ return false; -+ } -+ -+ return true; -+} -+ -+static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shader_instruction *ins, -+ struct sm6_value *dst) -+{ -+ const struct sm6_type *type; -+ -+ ins->handler_idx = VKD3DSIH_NOP; -+ -+ if (!dst->type) -+ return; -+ -+ type = sm6_type_get_scalar_type(dst->type, 0); -+ shader_register_init(&dst->u.reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0); -+ /* dst->is_undefined is not set here because it flags only explicitly undefined values. */ -+} -+ -+static void sm6_parser_decode_dx_op(struct sm6_parser *sm6, struct sm6_block *code_block, enum dx_intrinsic_opcode op, -+ const char *name, const struct sm6_value **operands, unsigned int operand_count, -+ struct vkd3d_shader_instruction *ins, struct sm6_value *dst) -+{ -+ if (op >= ARRAY_SIZE(sm6_dx_op_table) || !sm6_dx_op_table[op].operand_info) -+ { -+ FIXME("Unhandled dx intrinsic function id %u, '%s'.\n", op, name); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC, -+ "Call to intrinsic function %s is unhandled.", name); -+ sm6_parser_emit_unhandled(sm6, ins, dst); -+ return; -+ } -+ -+ if (sm6_parser_validate_dx_op(sm6, op, name, operands, operand_count, dst)) -+ sm6_dx_op_table[op].handler(sm6, code_block, op, operands, ins); -+ else -+ sm6_parser_emit_unhandled(sm6, ins, dst); -+} -+ -+static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) -+{ -+ const struct sm6_value *operands[DXIL_OP_MAX_OPERANDS]; -+ const struct sm6_value *fn_value, *op_value; -+ unsigned int i = 1, j, operand_count; -+ const struct sm6_type *type = NULL; -+ uint64_t call_conv; -+ -+ if (!dxil_record_validate_operand_min_count(record, 2, sm6)) -+ return; -+ -+ /* TODO: load the 1-based attributes index from record->operands[0] and validate against attribute count. */ -+ -+ if ((call_conv = record->operands[i++]) & CALL_CONV_FLAG_EXPLICIT_TYPE) -+ type = sm6_parser_get_type(sm6, record->operands[i++]); -+ if (call_conv &= ~CALL_CONV_FLAG_EXPLICIT_TYPE) -+ WARN("Ignoring calling convention %#"PRIx64".\n", call_conv); -+ -+ if (!(fn_value = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))) -+ return; -+ if (!sm6_value_is_function_dcl(fn_value)) -+ { -+ WARN("Function target value is not a function declaration.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Function call target value is not a function declaration."); -+ return; -+ } -+ -+ if (type && type != fn_value->type->u.pointer.type) -+ WARN("Explicit call type does not match function type.\n"); -+ type = fn_value->type->u.pointer.type; -+ -+ if (!sm6_type_is_void(type->u.function->ret_type)) -+ dst->type = type->u.function->ret_type; -+ -+ operand_count = type->u.function->param_count; -+ if (operand_count > ARRAY_SIZE(operands)) -+ { -+ WARN("Ignoring %zu operands.\n", operand_count - ARRAY_SIZE(operands)); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %zu operands for function call.", operand_count - ARRAY_SIZE(operands)); -+ operand_count = ARRAY_SIZE(operands); -+ } -+ -+ for (j = 0; j < operand_count; ++j) -+ { -+ if (!(operands[j] = sm6_parser_get_value_by_ref(sm6, record, type->u.function->param_types[j], &i))) -+ return; -+ } -+ if ((j = record->operand_count - i)) -+ { -+ WARN("Ignoring %u operands beyond the function parameter list.\n", j); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, -+ "Ignoring %u function call operands beyond the parameter list.", j); -+ } -+ -+ if (!fn_value->u.function.is_prototype) -+ { -+ FIXME("Unhandled call to local function.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Call to a local function is unsupported."); -+ return; -+ } -+ if (!sm6_value_is_dx_intrinsic_dcl(fn_value)) -+ WARN("External function is not a dx intrinsic.\n"); -+ -+ if (!operand_count) -+ { -+ WARN("Missing dx intrinsic function id.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT, -+ "The id for a dx intrinsic function is missing."); -+ return; -+ } -+ -+ op_value = operands[0]; -+ if (!sm6_value_is_constant(op_value) || !sm6_type_is_integer(op_value->type)) -+ { -+ WARN("dx intrinsic function id is not a constant int.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Expected a constant integer dx intrinsic function id."); -+ return; -+ } -+ sm6_parser_decode_dx_op(sm6, code_block, register_get_uint_value(&op_value->u.reg), -+ fn_value->u.function.name, &operands[1], operand_count - 1, ins, dst); -+} -+ -+static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record *record, -+ struct sm6_block *code_block, struct vkd3d_shader_instruction *ins) -+{ -+ if (!dxil_record_validate_operand_count(record, 0, 1, sm6)) -+ return; -+ -+ if (record->operand_count) -+ FIXME("Non-void return is not implemented.\n"); -+ -+ ins->handler_idx = VKD3DSIH_NOP; -+} -+ -+static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const struct dxil_block *block, -+ struct sm6_function *function) -+{ -+ struct vkd3d_shader_instruction *ins; -+ const struct dxil_record *record; -+ bool ret_found, is_terminator; -+ struct sm6_block *code_block; -+ struct sm6_value *dst; -+ size_t i, block_idx; -+ -+ if (sm6->function_count) -+ { -+ FIXME("Multiple functions are not supported yet.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!(function->declaration = sm6_parser_next_function_definition(sm6))) -+ { -+ WARN("Failed to find definition to match function body.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (block->record_count < 2) -+ { -+ /* It should contain at least a block count and a RET instruction. */ -+ WARN("Invalid function block record count %zu.\n", block->record_count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (block->records[0]->code != FUNC_CODE_DECLAREBLOCKS || !block->records[0]->operand_count -+ || block->records[0]->operands[0] > UINT_MAX) -+ { -+ WARN("Block count declaration not found or invalid.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(function->block_count = block->records[0]->operands[0])) -+ { -+ WARN("Function contains no blocks.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (function->block_count > 1) -+ { -+ FIXME("Branched shaders are not supported yet.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (!(function->blocks[0] = sm6_block_create())) -+ { -+ ERR("Failed to allocate code block.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ code_block = function->blocks[0]; -+ -+ sm6->cur_max_value = function->value_count; -+ -+ for (i = 1, block_idx = 0, ret_found = false; i < block->record_count; ++i) -+ { -+ sm6->p.location.column = i; -+ -+ if (!code_block) -+ { -+ WARN("Invalid block count %zu.\n", function->block_count); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, -+ "Invalid block count %zu.", function->block_count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ /* block->record_count - 1 is the instruction count, but some instructions -+ * can emit >1 IR instruction, so extra may be used. */ -+ if (!vkd3d_array_reserve((void **)&code_block->instructions, &code_block->instruction_capacity, -+ max(code_block->instruction_count + 1, block->record_count), sizeof(*code_block->instructions))) -+ { -+ ERR("Failed to allocate instructions.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ ins = &code_block->instructions[code_block->instruction_count]; -+ ins->handler_idx = VKD3DSIH_INVALID; -+ -+ dst = sm6_parser_get_current_value(sm6); -+ dst->type = NULL; -+ dst->value_type = VALUE_TYPE_REG; -+ is_terminator = false; -+ -+ record = block->records[i]; -+ switch (record->code) -+ { -+ case FUNC_CODE_INST_CALL: -+ sm6_parser_emit_call(sm6, record, code_block, ins, dst); -+ break; -+ case FUNC_CODE_INST_RET: -+ sm6_parser_emit_ret(sm6, record, code_block, ins); -+ is_terminator = true; -+ ret_found = true; -+ break; -+ default: -+ FIXME("Unhandled dxil instruction %u.\n", record->code); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (sm6->p.failed) -+ return VKD3D_ERROR; -+ assert(ins->handler_idx != VKD3DSIH_INVALID); -+ -+ if (is_terminator) -+ { -+ ++block_idx; -+ code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; -+ } -+ if (code_block) -+ code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; -+ else -+ assert(ins->handler_idx == VKD3DSIH_NOP); -+ -+ sm6->value_count += !!dst->type; -+ } -+ -+ if (!ret_found) -+ { -+ WARN("Function contains no RET instruction.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static bool sm6_block_emit_instructions(struct sm6_block *block, struct sm6_parser *sm6) -+{ -+ struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, block->instruction_count + 1); -+ -+ if (!ins) -+ return false; -+ -+ memcpy(ins, block->instructions, block->instruction_count * sizeof(*block->instructions)); -+ sm6->p.instructions.count += block->instruction_count; -+ -+ sm6_parser_add_instruction(sm6, VKD3DSIH_RET); -+ -+ return true; -+} -+ -+static enum vkd3d_result sm6_parser_module_init(struct sm6_parser *sm6, const struct dxil_block *block, -+ unsigned int level) -+{ -+ size_t i, old_value_count = sm6->value_count; -+ struct sm6_function *function; -+ enum vkd3d_result ret; -+ -+ for (i = 0; i < block->child_block_count; ++i) -+ { -+ if ((ret = sm6_parser_module_init(sm6, block->child_blocks[i], level + 1)) < 0) -+ return ret; -+ } -+ -+ sm6->p.location.line = block->id; -+ sm6->p.location.column = 0; -+ -+ switch (block->id) -+ { -+ case CONSTANTS_BLOCK: -+ function = &sm6->functions[sm6->function_count]; -+ sm6->cur_max_value = function->value_count; -+ return sm6_parser_constants_init(sm6, block); -+ -+ case FUNCTION_BLOCK: -+ function = &sm6->functions[sm6->function_count]; -+ if ((ret = sm6_parser_function_init(sm6, block, function)) < 0) -+ return ret; -+ /* The value index returns to its previous value after handling a function. It's usually nonzero -+ * at the start because of global constants/variables/function declarations. Function constants -+ * occur in a child block, so value_count is already saved before they are emitted. */ -+ memset(&sm6->values[old_value_count], 0, (sm6->value_count - old_value_count) * sizeof(*sm6->values)); -+ sm6->value_count = old_value_count; -+ break; -+ -+ case BLOCKINFO_BLOCK: -+ case MODULE_BLOCK: -+ case PARAMATTR_BLOCK: -+ case PARAMATTR_GROUP_BLOCK: -+ case VALUE_SYMTAB_BLOCK: -+ case METADATA_BLOCK: -+ case METADATA_ATTACHMENT_BLOCK: -+ case TYPE_BLOCK: -+ break; -+ -+ default: -+ FIXME("Unhandled block id %u.\n", block->id); -+ break; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static void sm6_type_table_cleanup(struct sm6_type *types, size_t count) -+{ -+ size_t i; -+ -+ if (!types) -+ return; -+ -+ for (i = 0; i < count; ++i) -+ { -+ switch (types[i].class) -+ { -+ case TYPE_CLASS_STRUCT: -+ vkd3d_free((void *)types[i].u.struc->name); -+ vkd3d_free(types[i].u.struc); -+ break; -+ case TYPE_CLASS_FUNCTION: -+ vkd3d_free(types[i].u.function); -+ break; -+ default: -+ break; -+ } -+ } -+ -+ vkd3d_free(types); -+} -+ -+static void sm6_symtab_cleanup(struct sm6_symbol *symbols, size_t count) -+{ -+ size_t i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free((void *)symbols[i].name); -+ vkd3d_free(symbols); -+} -+ -+static void sm6_block_destroy(struct sm6_block *block) -+{ -+ vkd3d_free(block->instructions); -+ vkd3d_free(block); -+} -+ -+static void sm6_functions_cleanup(struct sm6_function *functions, size_t count) -+{ -+ size_t i, j; -+ -+ for (i = 0; i < count; ++i) -+ { -+ for (j = 0; j < functions[i].block_count; ++j) -+ sm6_block_destroy(functions[i].blocks[j]); -+ } -+ vkd3d_free(functions); -+} -+ -+static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) -+{ -+ struct sm6_parser *sm6 = sm6_parser(parser); -+ -+ dxil_block_destroy(&sm6->root_block); -+ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -+ shader_instruction_array_destroy(&parser->instructions); -+ sm6_type_table_cleanup(sm6->types, sm6->type_count); -+ sm6_symtab_cleanup(sm6->global_symbols, sm6->global_symbol_count); -+ sm6_functions_cleanup(sm6->functions, sm6->function_count); -+ vkd3d_free(sm6->values); -+ free_shader_desc(&parser->shader_desc); -+ vkd3d_free(sm6); -+} -+ -+static const struct vkd3d_shader_parser_ops sm6_parser_ops = -+{ -+ .parser_destroy = sm6_parser_destroy, -+}; -+ -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, -+ const char *source_name, struct vkd3d_shader_message_context *message_context) -+{ -+ const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; -+ const struct vkd3d_shader_location location = {.source_name = source_name}; -+ uint32_t version_token, dxil_version, token_count, magic; -+ unsigned int chunk_offset, chunk_size; -+ size_t count, length, function_count; -+ enum bitcode_block_abbreviation abbr; -+ struct vkd3d_shader_version version; -+ struct dxil_block *block; -+ enum vkd3d_result ret; -+ unsigned int i; -+ -+ count = byte_code_size / sizeof(*byte_code); -+ if (count < 6) -+ { -+ WARN("Invalid data size %zu.\n", byte_code_size); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, -+ "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ version_token = byte_code[0]; -+ TRACE("Compiler version: 0x%08x.\n", version_token); -+ token_count = byte_code[1]; -+ TRACE("Token count: %u.\n", token_count); -+ -+ if (token_count < 6 || count < token_count) -+ { -+ WARN("Invalid token count %u (word count %zu).\n", token_count, count); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, -+ "DXIL chunk token count %#x is invalid (word count %zu).", token_count, count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (byte_code[2] != TAG_DXIL) -+ WARN("Unknown magic number 0x%08x.\n", byte_code[2]); -+ -+ dxil_version = byte_code[3]; -+ if (dxil_version > 0x102) -+ WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); -+ else -+ TRACE("DXIL version: 0x%08x.\n", dxil_version); -+ -+ chunk_offset = byte_code[4]; -+ if (chunk_offset < 16 || chunk_offset >= byte_code_size) -+ { -+ WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, -+ "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ chunk_size = byte_code[5]; -+ if (chunk_size > byte_code_size - chunk_offset) -+ { -+ WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", -+ chunk_size, byte_code_size, chunk_offset); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, -+ "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", -+ chunk_size, byte_code_size, chunk_offset); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); -+ if ((magic = sm6->start[0]) != BITCODE_MAGIC) -+ { -+ WARN("Unknown magic number 0x%08x.\n", magic); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, -+ "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); -+ } -+ -+ sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; -+ -+ if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) -+ { -+ FIXME("Unknown shader type %#x.\n", version.type); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, -+ "Unknown shader type %#x.", version.type); -+ } -+ -+ version.major = VKD3D_SM6_VERSION_MAJOR(version_token); -+ version.minor = VKD3D_SM6_VERSION_MINOR(version_token); -+ -+ if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) -+ { -+ WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, -+ "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ /* Estimate instruction count to avoid reallocation in most shaders. */ -+ count = max(token_count, 400) - 400; -+ vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, -+ (count + (count >> 2)) / 2u + 10); -+ sm6->ptr = &sm6->start[1]; -+ sm6->bitpos = 2; -+ -+ block = &sm6->root_block; -+ if ((ret = dxil_block_init(block, NULL, sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL bitcode chunk."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, -+ "DXIL bitcode chunk has invalid bitcode."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); -+ sm6->abbrevs = NULL; -+ sm6->abbrev_count = 0; -+ -+ length = sm6->ptr - sm6->start - block->start; -+ if (length != block->length) -+ { -+ WARN("Invalid block length %zu; expected %u.\n", length, block->length); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, -+ "Root block ends with length %zu but indicated length is %u.", length, block->length); -+ } -+ if (sm6->ptr != sm6->end) -+ { -+ size_t expected_length = sm6->end - sm6->start; -+ length = sm6->ptr - sm6->start; -+ WARN("Invalid module length %zu; expected %zu.\n", length, expected_length); -+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, -+ "Module ends with length %zu but indicated length is %zu.", length, expected_length); -+ } -+ -+ if ((ret = sm6_parser_type_table_init(sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL type table."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE, -+ "DXIL type table is invalid."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ if ((ret = sm6_parser_symtab_init(sm6)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL value symbol table."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB, -+ "DXIL value symbol table is invalid."); -+ else -+ vkd3d_unreachable(); -+ return ret; -+ } -+ -+ if (!(sm6->output_params = shader_parser_get_dst_params(&sm6->p, output_signature->element_count))) -+ { -+ ERR("Failed to allocate output parameters.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating output parameters."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ function_count = dxil_block_compute_function_count(&sm6->root_block); -+ if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) -+ { -+ ERR("Failed to allocate function array.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating DXIL function array."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if (sm6_parser_compute_max_value_count(sm6, &sm6->root_block, 0) == SIZE_MAX) -+ { -+ WARN("Value array count overflowed.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "Overflow occurred in the DXIL module value count."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ if (!(sm6->values = vkd3d_calloc(sm6->value_capacity, sizeof(*sm6->values)))) -+ { -+ ERR("Failed to allocate value array.\n"); -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory allocating DXIL value array."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ if ((ret = sm6_parser_globals_init(sm6)) < 0) -+ { -+ WARN("Failed to load global declarations.\n"); -+ return ret; -+ } -+ -+ sm6_parser_init_output_signature(sm6, output_signature); -+ -+ if ((ret = sm6_parser_module_init(sm6, &sm6->root_block, 0)) < 0) -+ { -+ if (ret == VKD3D_ERROR_OUT_OF_MEMORY) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory parsing DXIL module."); -+ else if (ret == VKD3D_ERROR_INVALID_SHADER) -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE, -+ "DXIL module is invalid."); -+ return ret; -+ } -+ -+ if (!sm6_parser_require_space(sm6, output_signature->element_count)) -+ { -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory emitting shader signature declarations."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ sm6_parser_emit_output_signature(sm6, output_signature); -+ -+ for (i = 0; i < sm6->function_count; ++i) -+ { -+ if (!sm6_block_emit_instructions(sm6->functions[i].blocks[0], sm6)) -+ { -+ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -+ "Out of memory emitting shader instructions."); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ -+ dxil_block_destroy(&sm6->root_block); -+ -+ return VKD3D_OK; -+} -+ -+int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -+{ -+ struct vkd3d_shader_desc *shader_desc; -+ uint32_t *byte_code = NULL; -+ struct sm6_parser *sm6; -+ int ret; -+ -+ ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ -+ if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) -+ { -+ ERR("Failed to allocate parser.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ shader_desc = &sm6->p.shader_desc; -+ shader_desc->is_dxil = true; -+ if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, -+ shader_desc)) < 0) -+ { -+ WARN("Failed to extract shader, vkd3d result %d.\n", ret); -+ vkd3d_free(sm6); -+ return ret; -+ } -+ -+ sm6->p.shader_desc = *shader_desc; -+ shader_desc = &sm6->p.shader_desc; -+ -+ if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) -+ { -+ /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC -+ * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ -+ if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) -+ ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); -+ else -+ memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); -+ } -+ -+ ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, -+ compile_info->source_name, message_context); -+ vkd3d_free(byte_code); -+ -+ if (ret < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ sm6_parser_destroy(&sm6->p); -+ return ret; -+ } -+ -+ *parser = &sm6->p; -+ -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 64d6e87065b..5fe9047bf25 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -72,6 +72,27 @@ void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, c - ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED; - } - -+char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) -+{ -+ struct vkd3d_string_buffer *string; -+ va_list args; -+ char *ret; -+ -+ if (!(string = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ va_start(args, fmt); -+ if (vkd3d_string_buffer_vprintf(string, fmt, args) < 0) -+ { -+ va_end(args); -+ hlsl_release_string_buffer(ctx, string); -+ return NULL; -+ } -+ va_end(args); -+ ret = hlsl_strdup(ctx, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return ret; -+} -+ - bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var) - { - struct hlsl_scope *scope = ctx->cur_scope; -@@ -112,8 +133,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) - - void hlsl_free_var(struct hlsl_ir_var *decl) - { -+ unsigned int k; -+ - vkd3d_free((void *)decl->name); - hlsl_cleanup_semantic(&decl->semantic); -+ for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) -+ vkd3d_free((void *)decl->objects_usage[k]); - vkd3d_free(decl); - } - -@@ -126,7 +151,7 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) - - unsigned int hlsl_type_minor_size(const struct hlsl_type *type) - { -- if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) -+ if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) - return type->dimx; - else - return type->dimy; -@@ -134,7 +159,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type) - - unsigned int hlsl_type_major_size(const struct hlsl_type *type) - { -- if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) -+ if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) - return type->dimy; - else - return type->dimx; -@@ -142,7 +167,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type) - - unsigned int hlsl_type_element_count(const struct hlsl_type *type) - { -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_VECTOR: - return type->dimx; -@@ -157,16 +182,26 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) - } - } - --static unsigned int get_array_size(const struct hlsl_type *type) -+const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type) -+{ -+ if (type->class == HLSL_CLASS_ARRAY) -+ return hlsl_get_multiarray_element_type(type->e.array.type); -+ return type; -+} -+ -+unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) - { -- if (type->type == HLSL_CLASS_ARRAY) -- return get_array_size(type->e.array.type) * type->e.array.elements_count; -+ if (type->class == HLSL_CLASS_ARRAY) -+ return hlsl_get_multiarray_size(type->e.array.type) * type->e.array.elements_count; - return 1; - } - - bool hlsl_type_is_resource(const struct hlsl_type *type) - { -- if (type->type == HLSL_CLASS_OBJECT) -+ if (type->class == HLSL_CLASS_ARRAY) -+ return hlsl_type_is_resource(type->e.array.type); -+ -+ if (type->class == HLSL_CLASS_OBJECT) - { - switch (type->base_type) - { -@@ -183,10 +218,13 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) - - enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) - { -- if (type->type <= HLSL_CLASS_LAST_NUMERIC) -+ if (type->class <= HLSL_CLASS_LAST_NUMERIC) - return HLSL_REGSET_NUMERIC; - -- if (type->type == HLSL_CLASS_OBJECT) -+ if (type->class == HLSL_CLASS_ARRAY) -+ return hlsl_type_get_regset(type->e.array.type); -+ -+ if (type->class == HLSL_CLASS_OBJECT) - { - switch (type->base_type) - { -@@ -203,8 +241,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) - vkd3d_unreachable(); - } - } -- else if (type->type == HLSL_CLASS_ARRAY) -- return hlsl_type_get_regset(type->e.array.type); - - vkd3d_unreachable(); - } -@@ -216,7 +252,8 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int - * (b) the type would cross a vec4 boundary; i.e. a vec3 and a - * vec1 can be packed together, but not a vec3 and a vec2. - */ -- if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) -+ if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY -+ || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) - return align(offset, 4); - return offset; - } -@@ -229,7 +266,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - for (k = 0; k <= HLSL_REGSET_LAST; ++k) - type->reg_size[k] = 0; - -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -@@ -278,7 +315,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - type->reg_size[k] += field->type->reg_size[k]; - } - -- type->dimx += field->type->dimx * field->type->dimy * get_array_size(field->type); -+ type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); - } - break; - } -@@ -317,7 +354,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e - vkd3d_free(type); - return NULL; - } -- type->type = type_class; -+ type->class = type_class; - type->base_type = base_type; - type->dimx = dimx; - type->dimy = dimy; -@@ -330,7 +367,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e - - static bool type_is_single_component(const struct hlsl_type *type) - { -- return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_OBJECT; -+ return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; - } - - /* Given a type and a component index, this function moves one step through the path required to -@@ -349,7 +386,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - assert(!type_is_single_component(type)); - assert(index < hlsl_type_component_count(type)); - -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_VECTOR: - assert(index < type->dimx); -@@ -414,6 +451,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl - return type; - } - -+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ enum hlsl_regset regset, unsigned int index) -+{ -+ struct hlsl_type *next_type; -+ unsigned int offset = 0; -+ unsigned int idx; -+ -+ while (!type_is_single_component(type)) -+ { -+ next_type = type; -+ idx = traverse_path_from_component_index(ctx, &next_type, &index); -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ if (regset == HLSL_REGSET_NUMERIC) -+ offset += idx; -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ offset += type->e.record.fields[idx].reg_offset[regset]; -+ break; -+ -+ case HLSL_CLASS_ARRAY: -+ if (regset == HLSL_REGSET_NUMERIC) -+ offset += idx * align(type->e.array.type->reg_size[regset], 4); -+ else -+ offset += idx * type->e.array.type->reg_size[regset]; -+ break; -+ -+ case HLSL_CLASS_OBJECT: -+ assert(idx == 0); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ type = next_type; -+ } -+ -+ return offset; -+} -+ - static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, - unsigned int path_len) - { -@@ -427,7 +509,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl - return true; - } - -- if (!(deref->path = hlsl_alloc(ctx, sizeof(*deref->path) * deref->path_len))) -+ if (!(deref->path = hlsl_calloc(ctx, deref->path_len, sizeof(*deref->path)))) - { - deref->var = NULL; - deref->path_len = 0; -@@ -437,13 +519,80 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl - return true; - } - -+bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain) -+{ -+ struct hlsl_ir_index *index; -+ struct hlsl_ir_load *load; -+ unsigned int chain_len, i; -+ struct hlsl_ir_node *ptr; -+ -+ deref->path = NULL; -+ deref->path_len = 0; -+ deref->offset.node = NULL; -+ -+ assert(chain); -+ if (chain->type == HLSL_IR_INDEX) -+ assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); -+ -+ /* Find the length of the index chain */ -+ chain_len = 0; -+ ptr = chain; -+ while (ptr->type == HLSL_IR_INDEX) -+ { -+ index = hlsl_ir_index(ptr); -+ -+ chain_len++; -+ ptr = index->val.node; -+ } -+ -+ if (ptr->type != HLSL_IR_LOAD) -+ { -+ hlsl_error(ctx, &chain->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid l-value."); -+ return false; -+ } -+ load = hlsl_ir_load(ptr); -+ -+ if (!init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) -+ return false; -+ -+ for (i = 0; i < load->src.path_len; ++i) -+ hlsl_src_from_node(&deref->path[i], load->src.path[i].node); -+ -+ chain_len = 0; -+ ptr = chain; -+ while (ptr->type == HLSL_IR_INDEX) -+ { -+ unsigned int p = deref->path_len - 1 - chain_len; -+ -+ index = hlsl_ir_index(ptr); -+ if (hlsl_index_is_noncontiguous(index)) -+ { -+ hlsl_src_from_node(&deref->path[p], deref->path[p + 1].node); -+ hlsl_src_remove(&deref->path[p + 1]); -+ hlsl_src_from_node(&deref->path[p + 1], index->idx.node); -+ } -+ else -+ { -+ hlsl_src_from_node(&deref->path[p], index->idx.node); -+ } -+ -+ chain_len++; -+ ptr = index->val.node; -+ } -+ assert(deref->path_len == load->src.path_len + chain_len); -+ -+ return true; -+} -+ - struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) - { - struct hlsl_type *type; - unsigned int i; - - assert(deref); -- assert(!deref->offset.node); -+ -+ if (deref->offset.node) -+ return deref->data_type; - - type = deref->var->data_type; - for (i = 0; i < deref->path_len; ++i) -@@ -459,9 +608,9 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl - { - unsigned int path_len, path_index, deref_path_len, i; - struct hlsl_type *path_type; -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *c; - -- list_init(&block->instrs); -+ hlsl_block_init(block); - - path_len = 0; - path_type = hlsl_deref_get_type(ctx, prefix); -@@ -487,12 +636,12 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl - - if (!(c = hlsl_new_uint_constant(ctx, next_index, loc))) - { -- hlsl_free_instr_list(&block->instrs); -+ hlsl_block_cleanup(block); - return false; - } -- list_add_tail(&block->instrs, &c->node.entry); -+ hlsl_block_add_instr(block, c); - -- hlsl_src_from_node(&deref->path[deref_path_len++], &c->node); -+ hlsl_src_from_node(&deref->path[deref_path_len++], c); - } - - assert(deref_path_len == deref->path_len); -@@ -505,7 +654,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co - { - assert(idx); - -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_VECTOR: - return hlsl_get_scalar_type(ctx, type->base_type); -@@ -523,8 +672,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co - { - struct hlsl_ir_constant *c = hlsl_ir_constant(idx); - -- assert(c->value[0].u < type->e.record.field_count); -- return type->e.record.fields[c->value[0].u].type; -+ assert(c->value.u[0].u < type->e.record.field_count); -+ return type->e.record.fields[c->value.u[0].u].type; - } - - default: -@@ -539,12 +688,13 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; - -- type->type = HLSL_CLASS_ARRAY; -+ type->class = HLSL_CLASS_ARRAY; - type->modifiers = basic_type->modifiers; - type->e.array.elements_count = array_size; - type->e.array.type = basic_type; - type->dimx = basic_type->dimx; - type->dimy = basic_type->dimy; -+ type->sampler_dim = basic_type->sampler_dim; - hlsl_type_calculate_reg_size(ctx, type); - - list_add_tail(&ctx->types, &type->entry); -@@ -559,7 +709,7 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; -- type->type = HLSL_CLASS_STRUCT; -+ type->class = HLSL_CLASS_STRUCT; - type->base_type = HLSL_TYPE_VOID; - type->name = name; - type->dimy = 1; -@@ -579,7 +729,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ - - if (!(type = hlsl_alloc(ctx, sizeof(*type)))) - return NULL; -- type->type = HLSL_CLASS_OBJECT; -+ type->class = HLSL_CLASS_OBJECT; - type->base_type = HLSL_TYPE_TEXTURE; - type->dimx = 4; - type->dimy = 1; -@@ -597,7 +747,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim - - if (!(type = vkd3d_calloc(1, sizeof(*type)))) - return NULL; -- type->type = HLSL_CLASS_OBJECT; -+ type->class = HLSL_CLASS_OBJECT; - type->base_type = HLSL_TYPE_UAV; - type->dimx = format->dimx; - type->dimy = 1; -@@ -614,6 +764,8 @@ static const char * get_case_insensitive_typename(const char *name) - { - "dword", - "float", -+ "matrix", -+ "vector", - }; - unsigned int i; - -@@ -677,7 +829,7 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha - - unsigned int hlsl_type_component_count(const struct hlsl_type *type) - { -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -@@ -709,7 +861,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - if (t1 == t2) - return true; - -- if (t1->type != t2->type) -+ if (t1->class != t2->class) - return false; - if (t1->base_type != t2->base_type) - return false; -@@ -729,7 +881,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - return false; - if (t1->dimy != t2->dimy) - return false; -- if (t1->type == HLSL_CLASS_STRUCT) -+ if (t1->class == HLSL_CLASS_STRUCT) - { - size_t i; - -@@ -748,7 +900,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - return false; - } - } -- if (t1->type == HLSL_CLASS_ARRAY) -+ if (t1->class == HLSL_CLASS_ARRAY) - return t1->e.array.elements_count == t2->e.array.elements_count - && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); - -@@ -772,7 +924,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - return NULL; - } - } -- type->type = old->type; -+ type->class = old->class; - type->base_type = old->base_type; - type->dimx = old->dimx; - type->dimy = old->dimy; -@@ -781,7 +933,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - type->modifiers |= default_majority; - type->sampler_dim = old->sampler_dim; - type->is_minimum_precision = old->is_minimum_precision; -- switch (old->type) -+ switch (old->class) - { - case HLSL_CLASS_ARRAY: - if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) -@@ -799,7 +951,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, - - type->e.record.field_count = field_count; - -- if (!(type->e.record.fields = hlsl_alloc(ctx, field_count * sizeof(*type->e.record.fields)))) -+ if (!(type->e.record.fields = hlsl_calloc(ctx, field_count, sizeof(*type->e.record.fields)))) - { - vkd3d_free((void *)type->name); - vkd3d_free(type); -@@ -848,40 +1000,58 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) - return true; - } - --struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, -+struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, - const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *cast; - -- cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, *loc); -+ cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, loc); - if (cast) - cast->data_type = type; -- return hlsl_ir_expr(cast); -+ return cast; - } - --struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) -+struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) - { - /* Use a cast to the same type as a makeshift identity expression. */ - return hlsl_new_cast(ctx, node, node->data_type, &node->loc); - } - - struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, -- const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, -+ const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, - const struct hlsl_reg_reservation *reg_reservation) - { - struct hlsl_ir_var *var; -+ unsigned int k; - - if (!(var = hlsl_alloc(ctx, sizeof(*var)))) - return NULL; - - var->name = name; - var->data_type = type; -- var->loc = loc; -+ var->loc = *loc; - if (semantic) - var->semantic = *semantic; - var->storage_modifiers = modifiers; - if (reg_reservation) - var->reg_reservation = *reg_reservation; -+ -+ for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) -+ { -+ unsigned int i, obj_count = type->reg_size[k]; -+ -+ if (obj_count == 0) -+ continue; -+ -+ if (!(var->objects_usage[k] = hlsl_calloc(ctx, obj_count, sizeof(*var->objects_usage[0])))) -+ { -+ for (i = 0; i < k; ++i) -+ vkd3d_free(var->objects_usage[i]); -+ vkd3d_free(var); -+ return NULL; -+ } -+ } -+ - return var; - } - -@@ -890,27 +1060,37 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem - { - struct vkd3d_string_buffer *string; - struct hlsl_ir_var *var; -- static LONG counter; -- const char *name; - - if (!(string = hlsl_get_string_buffer(ctx))) - return NULL; -- vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); -- if (!(name = hlsl_strdup(ctx, string->buffer))) -- { -- hlsl_release_string_buffer(ctx, string); -- return NULL; -- } -- var = hlsl_new_var(ctx, name, type, *loc, NULL, 0, NULL); -+ vkd3d_string_buffer_printf(string, "<%s-%u>", template, ctx->internal_name_counter++); -+ var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc, true); - hlsl_release_string_buffer(ctx, string); -+ return var; -+} -+ -+struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -+ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope) -+{ -+ struct hlsl_ir_var *var; -+ const char *name_copy; -+ -+ if (!(name_copy = hlsl_strdup(ctx, name))) -+ return NULL; -+ var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); - if (var) -- list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); -+ { -+ if (dummy_scope) -+ list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); -+ else -+ list_add_tail(&ctx->globals->vars, &var->scope_entry); -+ } - return var; - } - - static bool type_is_single_reg(const struct hlsl_type *type) - { -- return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; -+ return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_VECTOR; - } - - bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other) -@@ -964,7 +1144,7 @@ static void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, - list_init(&node->uses); - } - --struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) -+struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) - { - struct hlsl_deref lhs_deref; - -@@ -972,7 +1152,7 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir - return hlsl_new_store_index(ctx, &lhs_deref, NULL, rhs, 0, &rhs->loc); - } - --struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, -+struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, - struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_store *store; -@@ -1001,35 +1181,35 @@ struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hl - writemask = (1 << rhs->data_type->dimx) - 1; - store->writemask = writemask; - -- return store; -+ return &store->node; - } - --struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) - { - struct hlsl_block comp_path_block; - struct hlsl_ir_store *store; - -- list_init(&block->instrs); -+ hlsl_block_init(block); - - if (!(store = hlsl_alloc(ctx, sizeof(*store)))) -- return NULL; -+ return false; - init_node(&store->node, HLSL_IR_STORE, NULL, &rhs->loc); - - if (!init_deref_from_component_index(ctx, &comp_path_block, &store->lhs, lhs, comp, &rhs->loc)) - { - vkd3d_free(store); -- return NULL; -+ return false; - } -- list_move_tail(&block->instrs, &comp_path_block.instrs); -+ hlsl_block_add_block(block, &comp_path_block); - hlsl_src_from_node(&store->rhs, rhs); - - if (type_is_single_reg(rhs->data_type)) - store->writemask = (1 << rhs->data_type->dimx) - 1; - -- list_add_tail(&block->instrs, &store->node.entry); -+ hlsl_block_add_instr(block, &store->node); - -- return store; -+ return true; - } - - struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, -@@ -1045,66 +1225,54 @@ struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function - return &call->node; - } - --struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, -- const struct vkd3d_shader_location *loc) -+struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_constant *c; - -- assert(type->type <= HLSL_CLASS_VECTOR); -+ assert(type->class <= HLSL_CLASS_VECTOR); - - if (!(c = hlsl_alloc(ctx, sizeof(*c)))) - return NULL; - - init_node(&c->node, HLSL_IR_CONSTANT, type, loc); -+ c->value = *value; - -- return c; -+ return &c->node; - } - --struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) -+struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_constant *c; -+ struct hlsl_constant_value value; - -- if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) -- c->value[0].u = b ? ~0u : 0; -- -- return c; -+ value.u[0].u = b ? ~0u : 0; -+ return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &value, loc); - } - --struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, -+struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, - const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_constant *c; -+ struct hlsl_constant_value value; - -- if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -- c->value[0].f = f; -- -- return c; -+ value.u[0].f = f; -+ return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), &value, loc); - } - --struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, -- const struct vkd3d_shader_location *loc) -+struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_constant *c; -- -- c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); -+ struct hlsl_constant_value value; - -- if (c) -- c->value[0].i = n; -- -- return c; -+ value.u[0].i = n; -+ return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &value, loc); - } - --struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, -+struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, - const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_constant *c; -- -- c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -- -- if (c) -- c->value[0].u = n; -+ struct hlsl_constant_value value; - -- return c; -+ value.u[0].u = n; -+ return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); - } - - struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -@@ -1124,11 +1292,11 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op - } - - struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -- struct hlsl_ir_node *arg, struct vkd3d_shader_location loc) -+ struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg}; - -- return hlsl_new_expr(ctx, op, operands, arg->data_type, &loc); -+ return hlsl_new_expr(ctx, op, operands, arg->data_type, loc); - } - - struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, -@@ -1140,17 +1308,21 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - --struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc) -+struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, -+ struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_if *iff; - - if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) - return NULL; -- init_node(&iff->node, HLSL_IR_IF, NULL, &loc); -+ init_node(&iff->node, HLSL_IR_IF, NULL, loc); - hlsl_src_from_node(&iff->condition, condition); -- list_init(&iff->then_instrs.instrs); -- list_init(&iff->else_instrs.instrs); -- return iff; -+ hlsl_block_init(&iff->then_block); -+ hlsl_block_add_block(&iff->then_block, then_block); -+ hlsl_block_init(&iff->else_block); -+ if (else_block) -+ hlsl_block_add_block(&iff->else_block, else_block); -+ return &iff->node; - } - - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, -@@ -1183,23 +1355,36 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl - return load; - } - -+struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, -+ const struct vkd3d_shader_location *loc) -+{ -+ /* This deref can only exists temporarily because it is not the real owner of its members. */ -+ struct hlsl_deref tmp_deref; -+ -+ assert(deref->path_len >= 1); -+ -+ tmp_deref = *deref; -+ tmp_deref.path_len = deref->path_len - 1; -+ return hlsl_new_load_index(ctx, &tmp_deref, NULL, loc); -+} -+ - struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -- struct vkd3d_shader_location loc) -+ const struct vkd3d_shader_location *loc) - { - struct hlsl_deref var_deref; - - hlsl_init_simple_deref_from_var(&var_deref, var); -- return hlsl_new_load_index(ctx, &var_deref, NULL, &loc); -+ return hlsl_new_load_index(ctx, &var_deref, NULL, loc); - } - --struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *type, *comp_type; - struct hlsl_block comp_path_block; - struct hlsl_ir_load *load; - -- list_init(&block->instrs); -+ hlsl_block_init(block); - - if (!(load = hlsl_alloc(ctx, sizeof(*load)))) - return NULL; -@@ -1213,14 +1398,14 @@ struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b - vkd3d_free(load); - return NULL; - } -- list_move_tail(&block->instrs, &comp_path_block.instrs); -+ hlsl_block_add_block(block, &comp_path_block); - -- list_add_tail(&block->instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - -- return load; -+ return &load->node; - } - --struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, -+struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_resource_load *load; -@@ -1229,24 +1414,37 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, - return NULL; - init_node(&load->node, HLSL_IR_RESOURCE_LOAD, params->format, loc); - load->load_type = params->type; -- if (!hlsl_copy_deref(ctx, &load->resource, ¶ms->resource)) -+ -+ if (!hlsl_init_deref_from_index_chain(ctx, &load->resource, params->resource)) - { - vkd3d_free(load); - return NULL; - } -- if (!hlsl_copy_deref(ctx, &load->sampler, ¶ms->sampler)) -+ -+ if (params->sampler) - { -- hlsl_cleanup_deref(&load->resource); -- vkd3d_free(load); -- return NULL; -+ if (!hlsl_init_deref_from_index_chain(ctx, &load->sampler, params->sampler)) -+ { -+ hlsl_cleanup_deref(&load->resource); -+ vkd3d_free(load); -+ return NULL; -+ } - } -+ - hlsl_src_from_node(&load->coords, params->coords); -+ hlsl_src_from_node(&load->sample_index, params->sample_index); - hlsl_src_from_node(&load->texel_offset, params->texel_offset); - hlsl_src_from_node(&load->lod, params->lod); -- return load; -+ hlsl_src_from_node(&load->ddx, params->ddx); -+ hlsl_src_from_node(&load->ddy, params->ddy); -+ hlsl_src_from_node(&load->cmp, params->cmp); -+ load->sampling_dim = params->sampling_dim; -+ if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) -+ load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; -+ return &load->node; - } - --struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -+struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, - struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_resource_store *store; -@@ -1257,10 +1455,10 @@ struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, con - hlsl_copy_deref(ctx, &store->resource, resource); - hlsl_src_from_node(&store->coords, coords); - hlsl_src_from_node(&store->value, value); -- return store; -+ return &store->node; - } - --struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, -+struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_swizzle *swizzle; -@@ -1275,29 +1473,67 @@ struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned - init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); - hlsl_src_from_node(&swizzle->val, val); - swizzle->swizzle = s; -- return swizzle; -+ return &swizzle->node; -+} -+ -+bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) -+{ -+ struct hlsl_type *type = index->val.node->data_type; -+ -+ return type->class == HLSL_CLASS_MATRIX && !hlsl_type_is_row_major(type); -+} -+ -+bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) -+{ -+ return index->val.node->data_type->class == HLSL_CLASS_OBJECT; -+} -+ -+struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, -+ struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_type *type = val->data_type; -+ struct hlsl_ir_index *index; -+ -+ if (!(index = hlsl_alloc(ctx, sizeof(*index)))) -+ return NULL; -+ -+ if (type->class == HLSL_CLASS_OBJECT) -+ type = type->e.resource_format; -+ else if (type->class == HLSL_CLASS_MATRIX) -+ type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); -+ else -+ type = hlsl_get_element_type_from_path_index(ctx, type, idx); -+ -+ init_node(&index->node, HLSL_IR_INDEX, type, loc); -+ hlsl_src_from_node(&index->val, val); -+ hlsl_src_from_node(&index->idx, idx); -+ return &index->node; - } - --struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc) -+struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, -+ struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_jump *jump; - - if (!(jump = hlsl_alloc(ctx, sizeof(*jump)))) - return NULL; -- init_node(&jump->node, HLSL_IR_JUMP, NULL, &loc); -+ init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); - jump->type = type; -- return jump; -+ hlsl_src_from_node(&jump->condition, condition); -+ return &jump->node; - } - --struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc) -+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_loop *loop; - - if (!(loop = hlsl_alloc(ctx, sizeof(*loop)))) - return NULL; -- init_node(&loop->node, HLSL_IR_LOOP, NULL, &loc); -- list_init(&loop->body.instrs); -- return loop; -+ init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); -+ hlsl_block_init(&loop->body); -+ hlsl_block_add_block(&loop->body, block); -+ return &loop->node; - } - - struct clone_instr_map -@@ -1319,20 +1555,22 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, - const struct hlsl_ir_node *src; - struct hlsl_ir_node *dst; - -+ hlsl_block_init(dst_block); -+ - LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry) - { - if (!(dst = clone_instr(ctx, map, src))) - { -- hlsl_free_instr_list(&dst_block->instrs); -+ hlsl_block_cleanup(dst_block); - return false; - } -- list_add_tail(&dst_block->instrs, &dst->entry); -+ hlsl_block_add_instr(dst_block, dst); - - if (!list_empty(&src->uses)) - { - if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) - { -- hlsl_free_instr_list(&dst_block->instrs); -+ hlsl_block_cleanup(dst_block); - return false; - } - -@@ -1390,12 +1628,7 @@ static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call - - static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) - { -- struct hlsl_ir_constant *dst; -- -- if (!(dst = hlsl_new_constant(ctx, src->node.data_type, &src->node.loc))) -- return NULL; -- memcpy(dst->value, src->value, sizeof(src->value)); -- return &dst->node; -+ return hlsl_new_constant(ctx, src->node.data_type, &src->value, &src->node.loc); - } - - static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src) -@@ -1411,27 +1644,30 @@ static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_ - - static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) - { -- struct hlsl_ir_if *dst; -+ struct hlsl_block then_block, else_block; -+ struct hlsl_ir_node *dst; - -- if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) -+ if (!clone_block(ctx, &then_block, &src->then_block, map)) - return NULL; -+ if (!clone_block(ctx, &else_block, &src->else_block, map)) -+ { -+ hlsl_block_cleanup(&then_block); -+ return NULL; -+ } - -- if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) -- || !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map)) -+ if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc))) - { -- hlsl_free_instr(&dst->node); -+ hlsl_block_cleanup(&then_block); -+ hlsl_block_cleanup(&else_block); - return NULL; - } -- return &dst->node; -+ -+ return dst; - } - --static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) -+static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) - { -- struct hlsl_ir_jump *dst; -- -- if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) -- return NULL; -- return &dst->node; -+ return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); - } - - static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) -@@ -1452,16 +1688,18 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ - - static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) - { -- struct hlsl_ir_loop *dst; -+ struct hlsl_ir_node *dst; -+ struct hlsl_block body; - -- if (!(dst = hlsl_new_loop(ctx, src->node.loc))) -+ if (!clone_block(ctx, &body, &src->body, map)) - return NULL; -- if (!clone_block(ctx, &dst->body, &src->body, map)) -+ -+ if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) - { -- hlsl_free_instr(&dst->node); -+ hlsl_block_cleanup(&body); - return NULL; - } -- return &dst->node; -+ return dst; - } - - static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, -@@ -1486,7 +1724,12 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, - } - clone_src(map, &dst->coords, &src->coords); - clone_src(map, &dst->lod, &src->lod); -+ clone_src(map, &dst->ddx, &src->ddx); -+ clone_src(map, &dst->ddy, &src->ddy); -+ clone_src(map, &dst->sample_index, &src->sample_index); -+ clone_src(map, &dst->cmp, &src->cmp); - clone_src(map, &dst->texel_offset, &src->texel_offset); -+ dst->sampling_dim = src->sampling_dim; - return &dst->node; - } - -@@ -1529,12 +1772,19 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr - static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, - struct clone_instr_map *map, struct hlsl_ir_swizzle *src) - { -- struct hlsl_ir_swizzle *dst; -+ return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, -+ map_instr(map, src->val.node), &src->node.loc); -+} - -- if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, -- map_instr(map, src->val.node), &src->node.loc))) -+static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, -+ struct hlsl_ir_index *src) -+{ -+ struct hlsl_ir_node *dst; -+ -+ if (!(dst = hlsl_new_index(ctx, map_instr(map, src->val.node), map_instr(map, src->idx.node), -+ &src->node.loc))) - return NULL; -- return &dst->node; -+ return dst; - } - - static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, -@@ -1554,8 +1804,11 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - case HLSL_IR_IF: - return clone_if(ctx, map, hlsl_ir_if(instr)); - -+ case HLSL_IR_INDEX: -+ return clone_index(ctx, map, hlsl_ir_index(instr)); -+ - case HLSL_IR_JUMP: -- return clone_jump(ctx, hlsl_ir_jump(instr)); -+ return clone_jump(ctx, map, hlsl_ir_jump(instr)); - - case HLSL_IR_LOAD: - return clone_load(ctx, map, hlsl_ir_load(instr)); -@@ -1593,13 +1846,12 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, - const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) - { -+ struct hlsl_ir_node *constant, *store; - struct hlsl_ir_function_decl *decl; -- struct hlsl_ir_constant *constant; -- struct hlsl_ir_store *store; - - if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) - return NULL; -- list_init(&decl->body.instrs); -+ hlsl_block_init(&decl->body); - decl->return_type = return_type; - decl->parameters = *parameters; - decl->loc = *loc; -@@ -1620,17 +1872,17 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - - if (!(constant = hlsl_new_bool_constant(ctx, false, loc))) - return decl; -- list_add_tail(&decl->body.instrs, &constant->node.entry); -+ hlsl_block_add_instr(&decl->body, constant); - -- if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, &constant->node))) -+ if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, constant))) - return decl; -- list_add_tail(&decl->body.instrs, &store->node.entry); -+ hlsl_block_add_instr(&decl->body, store); - - return decl; - } - - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc) -+ const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) - { - struct hlsl_buffer *buffer; - -@@ -1640,7 +1892,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type - buffer->name = name; - if (reservation) - buffer->reservation = *reservation; -- buffer->loc = loc; -+ buffer->loc = *loc; - list_add_tail(&ctx->buffers, &buffer->entry); - return buffer; - } -@@ -1698,10 +1950,10 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls - { - int r; - -- if ((r = vkd3d_u32_compare(t1->type, t2->type))) -+ if ((r = vkd3d_u32_compare(t1->class, t2->class))) - { -- if (!((t1->type == HLSL_CLASS_SCALAR && t2->type == HLSL_CLASS_VECTOR) -- || (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_SCALAR))) -+ if (!((t1->class == HLSL_CLASS_SCALAR && t2->class == HLSL_CLASS_VECTOR) -+ || (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_SCALAR))) - return r; - } - if ((r = vkd3d_u32_compare(t1->base_type, t2->base_type))) -@@ -1718,7 +1970,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls - return r; - if ((r = vkd3d_u32_compare(t1->dimy, t2->dimy))) - return r; -- if (t1->type == HLSL_CLASS_STRUCT) -+ if (t1->class == HLSL_CLASS_STRUCT) - { - size_t i; - -@@ -1738,7 +1990,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls - } - return 0; - } -- if (t1->type == HLSL_CLASS_ARRAY) -+ if (t1->class == HLSL_CLASS_ARRAY) - { - if ((r = vkd3d_u32_compare(t1->e.array.elements_count, t2->e.array.elements_count))) - return r; -@@ -1768,7 +2020,7 @@ static int compare_function_decl_rb(const void *key, const struct rb_entry *entr - - struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) - { -- struct vkd3d_string_buffer *string; -+ struct vkd3d_string_buffer *string, *inner_string; - - static const char *const base_types[] = - { -@@ -1789,7 +2041,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - } - -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_SCALAR: - assert(type->base_type < ARRAY_SIZE(base_types)); -@@ -1808,10 +2060,9 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - - case HLSL_CLASS_ARRAY: - { -- struct vkd3d_string_buffer *inner_string; - const struct hlsl_type *t; - -- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) -+ for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) - ; - - if ((inner_string = hlsl_type_to_string(ctx, t))) -@@ -1820,7 +2071,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - hlsl_release_string_buffer(ctx, inner_string); - } - -- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) -+ for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) - { - if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - vkd3d_string_buffer_printf(string, "[]"); -@@ -1860,13 +2111,26 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - - assert(type->sampler_dim < ARRAY_SIZE(dimensions)); - assert(type->e.resource_format->base_type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "Texture%s<%s%u>", dimensions[type->sampler_dim], -- base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); -+ vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } - return string; - - case HLSL_TYPE_UAV: -- vkd3d_string_buffer_printf(string, "RWTexture%s<%s%u>", dimensions[type->sampler_dim], -- base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) -+ vkd3d_string_buffer_printf(string, "RWBuffer"); -+ else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -+ vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); -+ else -+ vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } - return string; - - default: -@@ -1881,6 +2145,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - } - -+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -+ unsigned int index) -+{ -+ struct hlsl_type *type = var->data_type, *current_type; -+ struct vkd3d_string_buffer *buffer; -+ unsigned int element_index; -+ -+ if (!(buffer = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ -+ vkd3d_string_buffer_printf(buffer, "%s", var->name); -+ -+ while (!type_is_single_component(type)) -+ { -+ current_type = type; -+ element_index = traverse_path_from_component_index(ctx, &type, &index); -+ if (current_type->class == HLSL_CLASS_STRUCT) -+ vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); -+ else -+ vkd3d_string_buffer_printf(buffer, "[%u]", element_index); -+ } -+ -+ return buffer; -+} -+ - const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) - { - struct vkd3d_string_buffer *string; -@@ -1939,17 +2228,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - { - static const char * const names[] = - { -- "HLSL_IR_CALL", -- "HLSL_IR_CONSTANT", -- "HLSL_IR_EXPR", -- "HLSL_IR_IF", -- "HLSL_IR_LOAD", -- "HLSL_IR_LOOP", -- "HLSL_IR_JUMP", -- "HLSL_IR_RESOURCE_LOAD", -- "HLSL_IR_RESOURCE_STORE", -- "HLSL_IR_STORE", -- "HLSL_IR_SWIZZLE", -+ [HLSL_IR_CALL ] = "HLSL_IR_CALL", -+ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", -+ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", -+ [HLSL_IR_IF ] = "HLSL_IR_IF", -+ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", -+ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", -+ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", -+ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", -+ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", -+ [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", -+ [HLSL_IR_STORE ] = "HLSL_IR_STORE", -+ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -1961,10 +2251,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - { - static const char * const names[] = - { -- "HLSL_IR_JUMP_BREAK", -- "HLSL_IR_JUMP_CONTINUE", -- "HLSL_IR_JUMP_DISCARD", -- "HLSL_IR_JUMP_RETURN", -+ [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", -+ [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", -+ [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", -+ [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", -+ [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", - }; - - assert(type < ARRAY_SIZE(names)); -@@ -1973,11 +2264,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - - static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr); - --static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) -+static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) - { - struct hlsl_ir_node *instr; - -- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - dump_instr(ctx, buffer, instr); - vkd3d_string_buffer_printf(buffer, "\n"); -@@ -2107,7 +2398,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl - vkd3d_string_buffer_printf(buffer, "{"); - for (x = 0; x < type->dimx; ++x) - { -- const union hlsl_constant_value *value = &constant->value[x]; -+ const union hlsl_constant_value_component *value = &constant->value.u[x]; - - switch (type->base_type) - { -@@ -2152,7 +2443,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_COS] = "cos", - [HLSL_OP1_COS_REDUCED] = "cos_reduced", - [HLSL_OP1_DSX] = "dsx", -+ [HLSL_OP1_DSX_COARSE] = "dsx_coarse", -+ [HLSL_OP1_DSX_FINE] = "dsx_fine", - [HLSL_OP1_DSY] = "dsy", -+ [HLSL_OP1_DSY_COARSE] = "dsy_coarse", -+ [HLSL_OP1_DSY_FINE] = "dsy_fine", - [HLSL_OP1_EXP2] = "exp2", - [HLSL_OP1_FRACT] = "fract", - [HLSL_OP1_LOG2] = "log2", -@@ -2168,6 +2463,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_SIN] = "sin", - [HLSL_OP1_SIN_REDUCED] = "sin_reduced", - [HLSL_OP1_SQRT] = "sqrt", -+ [HLSL_OP1_TRUNC] = "trunc", - - [HLSL_OP2_ADD] = "+", - [HLSL_OP2_BIT_AND] = "&", -@@ -2190,7 +2486,8 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP2_RSHIFT] = ">>", - - [HLSL_OP3_DP2ADD] = "dp2add", -- [HLSL_OP3_LERP] = "lerp", -+ [HLSL_OP3_MOVC] = "movc", -+ [HLSL_OP3_TERNARY] = "ternary", - }; - - return op_names[op]; -@@ -2214,9 +2511,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - vkd3d_string_buffer_printf(buffer, "if ("); - dump_src(buffer, &if_node->condition); - vkd3d_string_buffer_printf(buffer, ") {\n"); -- dump_instr_list(ctx, buffer, &if_node->then_instrs.instrs); -+ dump_block(ctx, buffer, &if_node->then_block); - vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); -- dump_instr_list(ctx, buffer, &if_node->else_instrs.instrs); -+ dump_block(ctx, buffer, &if_node->else_block); - vkd3d_string_buffer_printf(buffer, " %10s }", ""); - } - -@@ -2232,8 +2529,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i - vkd3d_string_buffer_printf(buffer, "continue"); - break; - -- case HLSL_IR_JUMP_DISCARD: -- vkd3d_string_buffer_printf(buffer, "discard"); -+ case HLSL_IR_JUMP_DISCARD_NEG: -+ vkd3d_string_buffer_printf(buffer, "discard_neg"); -+ break; -+ -+ case HLSL_IR_JUMP_DISCARD_NZ: -+ vkd3d_string_buffer_printf(buffer, "discard_nz"); - break; - - case HLSL_IR_JUMP_RETURN: -@@ -2245,7 +2546,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i - static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) - { - vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); -- dump_instr_list(ctx, buffer, &loop->body.instrs); -+ dump_block(ctx, buffer, &loop->body); - vkd3d_string_buffer_printf(buffer, " %10s }", ""); - } - -@@ -2255,11 +2556,17 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - { - [HLSL_RESOURCE_LOAD] = "load_resource", - [HLSL_RESOURCE_SAMPLE] = "sample", -+ [HLSL_RESOURCE_SAMPLE_CMP] = "sample_cmp", -+ [HLSL_RESOURCE_SAMPLE_CMP_LZ] = "sample_cmp_lz", - [HLSL_RESOURCE_SAMPLE_LOD] = "sample_lod", -+ [HLSL_RESOURCE_SAMPLE_LOD_BIAS] = "sample_biased", -+ [HLSL_RESOURCE_SAMPLE_GRAD] = "sample_grad", - [HLSL_RESOURCE_GATHER_RED] = "gather_red", - [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", - [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", - [HLSL_RESOURCE_GATHER_ALPHA] = "gather_alpha", -+ [HLSL_RESOURCE_SAMPLE_INFO] = "sample_info", -+ [HLSL_RESOURCE_RESINFO] = "resinfo", - }; - - assert(load->load_type < ARRAY_SIZE(type_names)); -@@ -2267,8 +2574,16 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - dump_deref(buffer, &load->resource); - vkd3d_string_buffer_printf(buffer, ", sampler = "); - dump_deref(buffer, &load->sampler); -- vkd3d_string_buffer_printf(buffer, ", coords = "); -- dump_src(buffer, &load->coords); -+ if (load->coords.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", coords = "); -+ dump_src(buffer, &load->coords); -+ } -+ if (load->sample_index.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", sample index = "); -+ dump_src(buffer, &load->sample_index); -+ } - if (load->texel_offset.node) - { - vkd3d_string_buffer_printf(buffer, ", offset = "); -@@ -2279,6 +2594,21 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - vkd3d_string_buffer_printf(buffer, ", lod = "); - dump_src(buffer, &load->lod); - } -+ if (load->ddx.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", ddx = "); -+ dump_src(buffer, &load->ddx); -+ } -+ if (load->ddy.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", ddy = "); -+ dump_src(buffer, &load->ddy); -+ } -+ if (load->cmp.node) -+ { -+ vkd3d_string_buffer_printf(buffer, ", cmp = "); -+ dump_src(buffer, &load->cmp); -+ } - vkd3d_string_buffer_printf(buffer, ")"); - } - -@@ -2321,6 +2651,14 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls - } - } - -+static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_index *index) -+{ -+ dump_src(buffer, &index->val); -+ vkd3d_string_buffer_printf(buffer, "[idx:"); -+ dump_src(buffer, &index->idx); -+ vkd3d_string_buffer_printf(buffer, "]"); -+} -+ - static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr) - { - if (instr->index) -@@ -2348,6 +2686,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - dump_ir_if(ctx, buffer, hlsl_ir_if(instr)); - break; - -+ case HLSL_IR_INDEX: -+ dump_ir_index(buffer, hlsl_ir_index(instr)); -+ break; -+ - case HLSL_IR_JUMP: - dump_ir_jump(buffer, hlsl_ir_jump(instr)); - break; -@@ -2392,7 +2734,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl - vkd3d_string_buffer_printf(&buffer, "\n"); - } - if (func->has_body) -- dump_instr_list(ctx, &buffer, &func->body.instrs); -+ dump_block(ctx, &buffer, &func->body); - - vkd3d_string_buffer_trace(&buffer); - vkd3d_string_buffer_cleanup(&buffer); -@@ -2421,7 +2763,7 @@ void hlsl_free_type(struct hlsl_type *type) - size_t i; - - vkd3d_free((void *)type->name); -- if (type->type == HLSL_CLASS_STRUCT) -+ if (type->class == HLSL_CLASS_STRUCT) - { - for (i = 0; i < type->e.record.field_count; ++i) - { -@@ -2447,6 +2789,11 @@ void hlsl_free_instr_list(struct list *list) - hlsl_free_instr(node); - } - -+void hlsl_block_cleanup(struct hlsl_block *block) -+{ -+ hlsl_free_instr_list(&block->instrs); -+} -+ - static void free_ir_call(struct hlsl_ir_call *call) - { - vkd3d_free(call); -@@ -2468,14 +2815,15 @@ static void free_ir_expr(struct hlsl_ir_expr *expr) - - static void free_ir_if(struct hlsl_ir_if *if_node) - { -- hlsl_free_instr_list(&if_node->then_instrs.instrs); -- hlsl_free_instr_list(&if_node->else_instrs.instrs); -+ hlsl_block_cleanup(&if_node->then_block); -+ hlsl_block_cleanup(&if_node->else_block); - hlsl_src_remove(&if_node->condition); - vkd3d_free(if_node); - } - - static void free_ir_jump(struct hlsl_ir_jump *jump) - { -+ hlsl_src_remove(&jump->condition); - vkd3d_free(jump); - } - -@@ -2487,7 +2835,7 @@ static void free_ir_load(struct hlsl_ir_load *load) - - static void free_ir_loop(struct hlsl_ir_loop *loop) - { -- hlsl_free_instr_list(&loop->body.instrs); -+ hlsl_block_cleanup(&loop->body); - vkd3d_free(loop); - } - -@@ -2497,7 +2845,11 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) - hlsl_cleanup_deref(&load->resource); - hlsl_src_remove(&load->coords); - hlsl_src_remove(&load->lod); -+ hlsl_src_remove(&load->ddx); -+ hlsl_src_remove(&load->ddy); -+ hlsl_src_remove(&load->cmp); - hlsl_src_remove(&load->texel_offset); -+ hlsl_src_remove(&load->sample_index); - vkd3d_free(load); - } - -@@ -2522,6 +2874,13 @@ static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) - vkd3d_free(swizzle); - } - -+static void free_ir_index(struct hlsl_ir_index *index) -+{ -+ hlsl_src_remove(&index->val); -+ hlsl_src_remove(&index->idx); -+ vkd3d_free(index); -+} -+ - void hlsl_free_instr(struct hlsl_ir_node *node) - { - assert(list_empty(&node->uses)); -@@ -2544,6 +2903,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - free_ir_if(hlsl_ir_if(node)); - break; - -+ case HLSL_IR_INDEX: -+ free_ir_index(hlsl_ir_index(node)); -+ break; -+ - case HLSL_IR_JUMP: - free_ir_jump(hlsl_ir_jump(node)); - break; -@@ -2580,7 +2943,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) - - for (i = 0; i < attr->args_count; ++i) - hlsl_src_remove(&attr->args[i]); -- hlsl_free_instr_list(&attr->instrs); -+ hlsl_block_cleanup(&attr->instrs); - vkd3d_free((void *)attr->name); - vkd3d_free(attr); - } -@@ -2600,7 +2963,7 @@ static void free_function_decl(struct hlsl_ir_function_decl *decl) - vkd3d_free((void *)decl->attrs); - - vkd3d_free(decl->parameters.vars); -- hlsl_free_instr_list(&decl->body.instrs); -+ hlsl_block_cleanup(&decl->body); - vkd3d_free(decl); - } - -@@ -2626,6 +2989,16 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function - struct hlsl_ir_function *func; - struct rb_entry *func_entry; - -+ if (ctx->internal_func_name) -+ { -+ char *internal_name; -+ -+ if (!(internal_name = hlsl_strdup(ctx, ctx->internal_func_name))) -+ return; -+ vkd3d_free(name); -+ name = internal_name; -+ } -+ - func_entry = rb_get(&ctx->functions, name); - if (func_entry) - { -@@ -2826,11 +3199,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - static const char *const sampler_names[] = - { -- [HLSL_SAMPLER_DIM_GENERIC] = "sampler", -- [HLSL_SAMPLER_DIM_1D] = "sampler1D", -- [HLSL_SAMPLER_DIM_2D] = "sampler2D", -- [HLSL_SAMPLER_DIM_3D] = "sampler3D", -- [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", -+ [HLSL_SAMPLER_DIM_GENERIC] = "sampler", -+ [HLSL_SAMPLER_DIM_COMPARISON] = "SamplerComparisonState", -+ [HLSL_SAMPLER_DIM_1D] = "sampler1D", -+ [HLSL_SAMPLER_DIM_2D] = "sampler2D", -+ [HLSL_SAMPLER_DIM_3D] = "sampler3D", -+ [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", - }; - - static const struct -@@ -2844,8 +3218,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - { - {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, - {"float", HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1}, -- {"VECTOR", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, -- {"MATRIX", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, -+ {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, -+ {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, - {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, - {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, - {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, -@@ -2884,8 +3258,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) - { -- unsigned int n_variants = 0; - const char *const *variants; -+ unsigned int n_variants; - - switch (bt) - { -@@ -2905,6 +3279,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - break; - - default: -+ n_variants = 0; -+ variants = NULL; - break; - } - -@@ -2956,9 +3332,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - } - } - --static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, -+static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, - const struct hlsl_profile_info *profile, struct vkd3d_shader_message_context *message_context) - { -+ unsigned int i; -+ - memset(ctx, 0, sizeof(*ctx)); - - ctx->profile = profile; -@@ -2967,7 +3345,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, - - if (!(ctx->source_files = hlsl_alloc(ctx, sizeof(*ctx->source_files)))) - return false; -- if (!(ctx->source_files[0] = hlsl_strdup(ctx, source_name ? source_name : ""))) -+ if (!(ctx->source_files[0] = hlsl_strdup(ctx, compile_info->source_name ? compile_info->source_name : ""))) - { - vkd3d_free(ctx->source_files); - return false; -@@ -2993,19 +3371,32 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, - - rb_init(&ctx->functions, compare_function_rb); - -- list_init(&ctx->static_initializers); -+ hlsl_block_init(&ctx->static_initializers); - list_init(&ctx->extern_vars); - - list_init(&ctx->buffers); - - if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) -+ hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) - return false; - if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, -- hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) -+ hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) - return false; - ctx->cur_buffer = ctx->globals_buffer; - -+ for (i = 0; i < compile_info->option_count; ++i) -+ { -+ const struct vkd3d_shader_compile_option *option = &compile_info->options[i]; -+ -+ if (option->name == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER) -+ { -+ if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ROW_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_ROW_MAJOR; -+ else if (option->value == VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_COLUMN_MAJOR) -+ ctx->matrix_majority = HLSL_MODIFIER_COLUMN_MAJOR; -+ } -+ } -+ - return true; - } - -@@ -3017,6 +3408,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - struct hlsl_type *type, *next_type; - unsigned int i; - -+ hlsl_block_cleanup(&ctx->static_initializers); -+ - for (i = 0; i < ctx->source_files_count; ++i) - vkd3d_free((void *)ctx->source_files[i]); - vkd3d_free(ctx->source_files); -@@ -3040,6 +3433,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - vkd3d_free((void *)buffer->name); - vkd3d_free(buffer); - } -+ -+ vkd3d_free(ctx->constant_defs.regs); - } - - int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, -@@ -3081,7 +3476,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - return VKD3D_ERROR_INVALID_ARGUMENT; - } - -- if (!hlsl_ctx_init(&ctx, compile_info->source_name, profile, message_context)) -+ if (!hlsl_ctx_init(&ctx, compile_info, profile, message_context)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - if ((ret = hlsl_lexer_compile(&ctx, hlsl)) == 2) -@@ -3135,3 +3530,44 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d - hlsl_ctx_cleanup(&ctx); - return ret; - } -+ -+struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl) -+{ -+ const struct hlsl_ir_function_decl *saved_cur_function = ctx->cur_function; -+ struct vkd3d_shader_code code = {.code = hlsl, .size = strlen(hlsl)}; -+ const char *saved_internal_func_name = ctx->internal_func_name; -+ struct vkd3d_string_buffer *internal_name; -+ struct hlsl_ir_function_decl *func; -+ void *saved_scanner = ctx->scanner; -+ int ret; -+ -+ TRACE("name %s, hlsl %s.\n", debugstr_a(name), debugstr_a(hlsl)); -+ -+ /* The actual name of the function is mangled with a unique prefix, both to -+ * allow defining multiple variants of a function with the same name, and to -+ * avoid polluting the user name space. */ -+ -+ if (!(internal_name = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ vkd3d_string_buffer_printf(internal_name, "<%s-%u>", name, ctx->internal_name_counter++); -+ -+ /* Save and restore everything that matters. -+ * Note that saving the scope stack is hard, and shouldn't be necessary. */ -+ -+ ctx->scanner = NULL; -+ ctx->internal_func_name = internal_name->buffer; -+ ctx->cur_function = NULL; -+ ret = hlsl_lexer_compile(ctx, &code); -+ ctx->scanner = saved_scanner; -+ ctx->internal_func_name = saved_internal_func_name; -+ ctx->cur_function = saved_cur_function; -+ if (ret) -+ { -+ ERR("Failed to compile intrinsic, error %u.\n", ret); -+ hlsl_release_string_buffer(ctx, internal_name); -+ return NULL; -+ } -+ func = hlsl_get_func_decl(ctx, internal_name->buffer); -+ hlsl_release_string_buffer(ctx, internal_name); -+ return func; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index ccbf22a5801..2cde5d58eba 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -21,10 +21,12 @@ - #define __VKD3D_SHADER_HLSL_H - - #include "vkd3d_shader_private.h" --#include "wine/rbtree.h" -+#include "rbtree.h" - #include "d3dcommon.h" - #include "d3dx9shader.h" --#include "sm4.h" -+ -+enum vkd3d_sm4_register_type; -+enum vkd3d_sm4_swizzle_type; - - /* The general IR structure is inspired by Mesa GLSL hir, even though the code - * ends up being quite different in practice. Anyway, here comes the relevant -@@ -102,18 +104,22 @@ enum hlsl_base_type - - enum hlsl_sampler_dim - { -- HLSL_SAMPLER_DIM_GENERIC, -- HLSL_SAMPLER_DIM_1D, -- HLSL_SAMPLER_DIM_2D, -- HLSL_SAMPLER_DIM_3D, -- HLSL_SAMPLER_DIM_CUBE, -- HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, -- HLSL_SAMPLER_DIM_1DARRAY, -- HLSL_SAMPLER_DIM_2DARRAY, -- HLSL_SAMPLER_DIM_2DMS, -- HLSL_SAMPLER_DIM_2DMSARRAY, -- HLSL_SAMPLER_DIM_CUBEARRAY, -- HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_CUBEARRAY, -+ HLSL_SAMPLER_DIM_GENERIC, -+ HLSL_SAMPLER_DIM_COMPARISON, -+ HLSL_SAMPLER_DIM_1D, -+ HLSL_SAMPLER_DIM_2D, -+ HLSL_SAMPLER_DIM_3D, -+ HLSL_SAMPLER_DIM_CUBE, -+ HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, -+ HLSL_SAMPLER_DIM_1DARRAY, -+ HLSL_SAMPLER_DIM_2DARRAY, -+ HLSL_SAMPLER_DIM_2DMS, -+ HLSL_SAMPLER_DIM_2DMSARRAY, -+ HLSL_SAMPLER_DIM_CUBEARRAY, -+ HLSL_SAMPLER_DIM_LAST_TEXTURE = HLSL_SAMPLER_DIM_CUBEARRAY, -+ HLSL_SAMPLER_DIM_BUFFER, -+ HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, -+ HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, - }; - - enum hlsl_regset -@@ -134,16 +140,17 @@ struct hlsl_type - /* Item entry in hlsl_scope->types. hlsl_type->name is used as key (if not NULL). */ - struct rb_entry scope_entry; - -- enum hlsl_type_class type; -+ enum hlsl_type_class class; - /* If type is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. - * If type is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. - * Otherwise, base_type is not used. */ - enum hlsl_base_type base_type; - - /* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. -- * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can have any value of the enum. -- * If base_type is HLSL_TYPE_UAV, them sampler_dim must be one of HLSL_SAMPLER_DIM_1D, -- * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, or HLSL_SAMPLER_DIM_2DARRAY. -+ * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_TEXTURE. -+ * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, -+ * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, -+ * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. - * Otherwise, sampler_dim is not used */ - enum hlsl_sampler_dim sampler_dim; - /* Name, in case the type is a named struct or a typedef. */ -@@ -207,6 +214,16 @@ struct hlsl_semantic - { - const char *name; - uint32_t index; -+ -+ /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ -+ bool reported_missing; -+ /* In case the variable or field that stores this semantic has already reported to use a -+ * duplicated output semantic, this value stores the last reported index + 1. Otherwise it is 0. */ -+ uint32_t reported_duplicated_output_next_index; -+ /* In case the variable or field that stores this semantic has already reported to use a -+ * duplicated input semantic with incompatible values, this value stores the last reported -+ * index + 1. Otherwise it is 0. */ -+ uint32_t reported_duplicated_input_incompatible_next_index; - }; - - /* A field within a struct type declaration, used in hlsl_type.e.fields. */ -@@ -228,16 +245,21 @@ struct hlsl_struct_field - size_t name_bytecode_offset; - }; - --/* Information of the register allocated for an instruction node or variable. -+/* Information of the register(s) allocated for an instruction node or variable. - * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, - * just before writing the bytecode. -- * For numeric registers, a writemask can be provided to indicate the reservation of only some of the -- * 4 components. - * The type of register (register class) is implied from its use, so it is not stored in this - * struct. */ - struct hlsl_reg - { -+ /* Index of the first register allocated. */ - uint32_t id; -+ /* Number of registers to be allocated. -+ * Unlike the variable's type's regsize, it is not expressed in register components, but rather -+ * in whole registers, and may depend on which components are used within the shader. */ -+ uint32_t allocation_size; -+ /* For numeric registers, a writemask can be provided to indicate the reservation of only some -+ * of the 4 components. */ - unsigned int writemask; - /* Whether the register has been allocated. */ - bool allocated; -@@ -254,6 +276,7 @@ enum hlsl_ir_node_type - HLSL_IR_CONSTANT, - HLSL_IR_EXPR, - HLSL_IR_IF, -+ HLSL_IR_INDEX, - HLSL_IR_LOAD, - HLSL_IR_LOOP, - HLSL_IR_JUMP, -@@ -314,7 +337,7 @@ struct hlsl_src - struct hlsl_attribute - { - const char *name; -- struct list instrs; -+ struct hlsl_block instrs; - struct vkd3d_shader_location loc; - unsigned int args_count; - struct hlsl_src args[]; -@@ -333,6 +356,7 @@ struct hlsl_attribute - #define HLSL_MODIFIER_COLUMN_MAJOR 0x00000400 - #define HLSL_STORAGE_IN 0x00000800 - #define HLSL_STORAGE_OUT 0x00001000 -+#define HLSL_MODIFIER_INLINE 0x00002000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -342,12 +366,17 @@ struct hlsl_attribute - - #define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0 - --/* Reservation of a specific register to a variable, field, or buffer, written in the HLSL source -- * using the register(·) syntax */ -+/* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a -+ * starting point of their allocation. They are available through the register(·) and the -+ * packoffset(·) syntaxes, respectivelly. -+ * The costant buffer offset is measured register components. */ - struct hlsl_reg_reservation - { -- char type; -- unsigned int index; -+ char reg_type; -+ unsigned int reg_index; -+ -+ char offset_type; -+ unsigned int offset_index; - }; - - struct hlsl_ir_var -@@ -360,8 +389,7 @@ struct hlsl_ir_var - struct hlsl_buffer *buffer; - /* Bitfield for storage modifiers (type modifiers are stored in data_type->modifiers). */ - unsigned int storage_modifiers; -- /* Optional register to be used as a starting point for the variable allocation, specified -- * by the user via the register(·) syntax. */ -+ /* Optional reservations of registers and/or offsets for variables within constant buffers. */ - struct hlsl_reg_reservation reg_reservation; - - /* Item entry in hlsl_scope.vars. Specifically hlsl_ctx.globals.vars if the variable is global. */ -@@ -384,10 +412,21 @@ struct hlsl_ir_var - * and the buffer_offset instead. */ - struct hlsl_reg regs[HLSL_REGSET_LAST + 1]; - -+ struct -+ { -+ bool used; -+ enum hlsl_sampler_dim sampler_dim; -+ struct vkd3d_shader_location first_sampler_dim_loc; -+ } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; -+ /* Minimum number of binds required to include all object components actually used in the shader. -+ * It may be less than the allocation size, e.g. for texture arrays. */ -+ unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; -+ - uint32_t is_input_semantic : 1; - uint32_t is_output_semantic : 1; - uint32_t is_uniform : 1; - uint32_t is_param : 1; -+ uint32_t is_separated_resource : 1; - }; - - /* Sized array of variables representing a function's parameters. */ -@@ -446,8 +485,8 @@ struct hlsl_ir_if - { - struct hlsl_ir_node node; - struct hlsl_src condition; -- struct hlsl_block then_instrs; -- struct hlsl_block else_instrs; -+ struct hlsl_block then_block; -+ struct hlsl_block else_block; - }; - - struct hlsl_ir_loop -@@ -468,7 +507,11 @@ enum hlsl_ir_expr_op - HLSL_OP1_COS, - HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ - HLSL_OP1_DSX, -+ HLSL_OP1_DSX_COARSE, -+ HLSL_OP1_DSX_FINE, - HLSL_OP1_DSY, -+ HLSL_OP1_DSY_COARSE, -+ HLSL_OP1_DSY_FINE, - HLSL_OP1_EXP2, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, -@@ -485,6 +528,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_SIN, - HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ - HLSL_OP1_SQRT, -+ HLSL_OP1_TRUNC, - - HLSL_OP2_ADD, - HLSL_OP2_BIT_AND, -@@ -506,8 +550,15 @@ enum hlsl_ir_expr_op - HLSL_OP2_NEQUAL, - HLSL_OP2_RSHIFT, - -+ /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, -+ * then adds c. */ - HLSL_OP3_DP2ADD, -- HLSL_OP3_LERP, -+ /* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise. -+ * TERNARY(a, b, c) returns c if a == 0 and b otherwise. -+ * They differ for floating point numbers, because -+ * -0.0 == 0.0, but it is not bitwise zero. */ -+ HLSL_OP3_MOVC, -+ HLSL_OP3_TERNARY, - }; - - #define HLSL_MAX_OPERANDS 3 -@@ -523,7 +574,8 @@ enum hlsl_ir_jump_type - { - HLSL_IR_JUMP_BREAK, - HLSL_IR_JUMP_CONTINUE, -- HLSL_IR_JUMP_DISCARD, -+ HLSL_IR_JUMP_DISCARD_NEG, -+ HLSL_IR_JUMP_DISCARD_NZ, - HLSL_IR_JUMP_RETURN, - }; - -@@ -531,6 +583,8 @@ struct hlsl_ir_jump - { - struct hlsl_ir_node node; - enum hlsl_ir_jump_type type; -+ /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ -+ struct hlsl_src condition; - }; - - struct hlsl_ir_swizzle -@@ -540,6 +594,12 @@ struct hlsl_ir_swizzle - DWORD swizzle; - }; - -+struct hlsl_ir_index -+{ -+ struct hlsl_ir_node node; -+ struct hlsl_src val, idx; -+}; -+ - /* Reference to a variable, or a part of it (e.g. a vector within a matrix within a struct). */ - struct hlsl_deref - { -@@ -559,9 +619,11 @@ struct hlsl_deref - * components, within the pertaining regset), from the start of the variable, of the part - * referenced. - * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- -- * before writing the bytecode. */ -+ * before writing the bytecode. -+ * Since the type information cannot longer be retrieved from the offset alone, the type is -+ * stored in the data_type field. */ - struct hlsl_src offset; -- enum hlsl_regset offset_regset; -+ struct hlsl_type *data_type; - }; - - struct hlsl_ir_load -@@ -574,11 +636,17 @@ enum hlsl_resource_load_type - { - HLSL_RESOURCE_LOAD, - HLSL_RESOURCE_SAMPLE, -+ HLSL_RESOURCE_SAMPLE_CMP, -+ HLSL_RESOURCE_SAMPLE_CMP_LZ, - HLSL_RESOURCE_SAMPLE_LOD, -+ HLSL_RESOURCE_SAMPLE_LOD_BIAS, -+ HLSL_RESOURCE_SAMPLE_GRAD, - HLSL_RESOURCE_GATHER_RED, - HLSL_RESOURCE_GATHER_GREEN, - HLSL_RESOURCE_GATHER_BLUE, - HLSL_RESOURCE_GATHER_ALPHA, -+ HLSL_RESOURCE_SAMPLE_INFO, -+ HLSL_RESOURCE_RESINFO, - }; - - struct hlsl_ir_resource_load -@@ -586,7 +654,8 @@ struct hlsl_ir_resource_load - struct hlsl_ir_node node; - enum hlsl_resource_load_type load_type; - struct hlsl_deref resource, sampler; -- struct hlsl_src coords, lod, texel_offset; -+ struct hlsl_src coords, lod, ddx, ddy, cmp, sample_index, texel_offset; -+ enum hlsl_sampler_dim sampling_dim; - }; - - struct hlsl_ir_resource_store -@@ -607,13 +676,16 @@ struct hlsl_ir_store - struct hlsl_ir_constant - { - struct hlsl_ir_node node; -- union hlsl_constant_value -+ struct hlsl_constant_value - { -- uint32_t u; -- int32_t i; -- float f; -- double d; -- } value[4]; -+ union hlsl_constant_value_component -+ { -+ uint32_t u; -+ int32_t i; -+ float f; -+ double d; -+ } u[4]; -+ } value; - /* Constant register of type 'c' where the constant value is stored for SM1. */ - struct hlsl_reg reg; - }; -@@ -674,6 +746,9 @@ struct hlsl_buffer - unsigned size, used_size; - /* Register of type 'b' on which the buffer is allocated. */ - struct hlsl_reg reg; -+ -+ bool manually_packed_elements; -+ bool automatically_packed_elements; - }; - - struct hlsl_ctx -@@ -730,6 +805,9 @@ struct hlsl_ctx - /* Pointer to the current function; changes as the parser reads the code. */ - const struct hlsl_ir_function_decl *cur_function; - -+ /* Counter for generating unique internal variable names. */ -+ unsigned int internal_name_counter; -+ - /* Default matrix majority for matrix types. Can be set by a pragma within the HLSL source. */ - unsigned int matrix_majority; - -@@ -744,15 +822,18 @@ struct hlsl_ctx - struct hlsl_type *Void; - } builtin_types; - -- /* List of the instruction nodes for initializing static variables; linked by the -- * hlsl_ir_node.entry fields. */ -- struct list static_initializers; -+ /* List of the instruction nodes for initializing static variables. */ -+ struct hlsl_block static_initializers; - - /* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. - * Only used for SM1 profiles. */ - struct hlsl_constant_defs - { -- struct hlsl_vec4 *values; -+ struct hlsl_constant_register -+ { -+ uint32_t index; -+ struct hlsl_vec4 value; -+ } *regs; - size_t count, size; - } constant_defs; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register -@@ -763,6 +844,12 @@ struct hlsl_ctx - * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ - uint32_t thread_count[3]; - -+ /* In some cases we generate opcodes by parsing an HLSL function and then -+ * invoking it. If not NULL, this field is the name of the function that we -+ * are currently parsing, "mangled" with an internal prefix to avoid -+ * polluting the user namespace. */ -+ const char *internal_func_name; -+ - /* Whether the parser is inside a state block (effects' metadata) inside a variable declaration. */ - uint32_t in_state_block : 1; - /* Whether the numthreads() attribute has been provided in the entry-point function. */ -@@ -780,8 +867,9 @@ struct hlsl_resource_load_params - { - struct hlsl_type *format; - enum hlsl_resource_load_type type; -- struct hlsl_deref resource, sampler; -- struct hlsl_ir_node *coords, *lod, *texel_offset; -+ struct hlsl_ir_node *resource, *sampler; -+ struct hlsl_ir_node *coords, *lod, *ddx, *ddy, *cmp, *sample_index, *texel_offset; -+ enum hlsl_sampler_dim sampling_dim; - }; - - static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) -@@ -850,6 +938,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node - return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); - } - -+static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) -+{ -+ assert(node->type == HLSL_IR_INDEX); -+ return CONTAINING_RECORD(node, struct hlsl_ir_index, node); -+} -+ -+static inline void hlsl_block_init(struct hlsl_block *block) -+{ -+ list_init(&block->instrs); -+} -+ -+static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr) -+{ -+ list_add_tail(&block->instrs, &instr->entry); -+} -+ -+static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add) -+{ -+ list_move_tail(&block->instrs, &add->instrs); -+} -+ - static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) - { - src->node = node; -@@ -873,6 +982,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) - return ptr; - } - -+static inline void *hlsl_calloc(struct hlsl_ctx *ctx, size_t count, size_t size) -+{ -+ void *ptr = vkd3d_calloc(count, size); -+ -+ if (!ptr) -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return ptr; -+} -+ - static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) - { - void *ret = vkd3d_realloc(ptr, size); -@@ -948,6 +1066,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) - switch (dim) - { - case HLSL_SAMPLER_DIM_1D: -+ case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return 1; - case HLSL_SAMPLER_DIM_1DARRAY: - case HLSL_SAMPLER_DIM_2D: -@@ -965,20 +1085,25 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) - } - } - -+char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) VKD3D_PRINTF_FUNC(2, 3); -+ - const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op); - const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type); - const char *debug_hlsl_writemask(unsigned int writemask); - const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count); - - struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); -+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -+ unsigned int index); - struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); - const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); - --struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); - void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); - bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); - -+void hlsl_block_cleanup(struct hlsl_block *block); - bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); - - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); -@@ -986,6 +1111,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); - -+bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); - bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); - - void hlsl_cleanup_deref(struct hlsl_deref *deref); -@@ -1012,64 +1138,77 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); - struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); - struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, - struct hlsl_ir_node *arg2); --struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); - struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, -- const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc); -+ const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, - const struct vkd3d_shader_location *loc); --struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, -+struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, - const struct vkd3d_shader_location *loc); --struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, -- const struct vkd3d_shader_location *loc); --struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); -+struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); - struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], - struct hlsl_type *data_type, const struct vkd3d_shader_location *loc); --struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, -+struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, - float f, const struct vkd3d_shader_location *loc); - struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, - const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); --struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc); --struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, -- const struct vkd3d_shader_location *loc); --struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc); -+struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, -+ struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, -+ enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); - - void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); - - struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -- struct vkd3d_shader_location loc); -+ const struct vkd3d_shader_location *loc); - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); --struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, -+ const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); - --struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); --struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, -+struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); -+struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, - struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc); --struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs); - --struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc); --struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, -+bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); -+bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); -+ -+struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, -+ struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); --struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -+struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, - struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); - struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - struct hlsl_struct_field *fields, size_t field_count); --struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, -+struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, - struct hlsl_type *type, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -+ struct hlsl_type *type, const struct vkd3d_shader_location *loc, bool dummy_scope); - struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, - unsigned int sample_count); - struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); --struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, -+struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, -- struct vkd3d_shader_location loc); -+ const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, -- const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, -+ const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, - const struct hlsl_reg_reservation *reg_reservation); - - void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -@@ -1092,6 +1231,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); - unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); - struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, - unsigned int index); -+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ enum hlsl_regset regset, unsigned int index); - bool hlsl_type_is_row_major(const struct hlsl_type *type); - unsigned int hlsl_type_minor_size(const struct hlsl_type *type); - unsigned int hlsl_type_major_size(const struct hlsl_type *type); -@@ -1101,6 +1242,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); - unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); - bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); - -+const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); -+unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); -+ - unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); - unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); - unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); -@@ -1109,12 +1253,17 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); - struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); - bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - unsigned int *start, unsigned int *count); -+bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, -+ enum hlsl_regset regset, unsigned int *index); - bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); - unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); - struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); - -+bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); -+bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -+ struct hlsl_block *block, void *context); - - bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); -@@ -1124,9 +1273,11 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); - bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); -+ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); - int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); - -+struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); -+ - int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); - - #endif -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index adff1da04d8..e9ae3ccf3d3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -37,6 +37,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); - %option bison-locations - %option extra-type="struct hlsl_ctx *" - %option never-interactive -+%option nodefault - %option noinput - %option nounput - %option noyywrap -@@ -95,6 +96,7 @@ matrix {return KW_MATRIX; } - namespace {return KW_NAMESPACE; } - nointerpolation {return KW_NOINTERPOLATION; } - out {return KW_OUT; } -+packoffset {return KW_PACKOFFSET; } - pass {return KW_PASS; } - PixelShader {return KW_PIXELSHADER; } - precise {return KW_PRECISE; } -@@ -102,6 +104,8 @@ RasterizerState {return KW_RASTERIZERSTATE; } - RenderTargetView {return KW_RENDERTARGETVIEW; } - return {return KW_RETURN; } - register {return KW_REGISTER; } -+RWBuffer {return KW_RWBUFFER; } -+RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } - RWTexture1D {return KW_RWTEXTURE1D; } - RWTexture2D {return KW_RWTEXTURE2D; } - RWTexture3D {return KW_RWTEXTURE3D; } -@@ -265,6 +269,10 @@ row_major {return KW_ROW_MAJOR; } - return STRING; - } - {WS}+ {} -+{ANY} { -+ FIXME("Malformed preprocessor line directive?\n"); -+ BEGIN(INITIAL); -+ } - {NEWLINE} { - FIXME("Malformed preprocessor line directive?\n"); - BEGIN(INITIAL); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index fd1eaf6ec95..fb6d485ea69 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -53,7 +53,7 @@ struct parse_initializer - { - struct hlsl_ir_node **args; - unsigned int args_count; -- struct list *instrs; -+ struct hlsl_block *instrs; - bool braces; - }; - -@@ -73,6 +73,10 @@ struct parse_variable_def - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; - struct parse_initializer initializer; -+ -+ struct hlsl_type *basic_type; -+ unsigned int modifiers; -+ struct vkd3d_shader_location modifiers_loc; - }; - - struct parse_function -@@ -85,8 +89,8 @@ struct parse_function - - struct parse_if_body - { -- struct list *then_instrs; -- struct list *else_instrs; -+ struct hlsl_block *then_block; -+ struct hlsl_block *else_block; - }; - - enum parse_assign_op -@@ -129,9 +133,18 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "%s", s); - } - --static struct hlsl_ir_node *node_from_list(struct list *list) -+static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) -+{ -+ return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); -+} -+ -+static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) - { -- return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); -+ struct hlsl_block *block; -+ -+ if ((block = hlsl_alloc(ctx, sizeof(*block)))) -+ hlsl_block_init(block); -+ return block; - } - - static struct list *make_empty_list(struct hlsl_ctx *ctx) -@@ -143,10 +156,10 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) - return list; - } - --static void destroy_instr_list(struct list *list) -+static void destroy_block(struct hlsl_block *block) - { -- hlsl_free_instr_list(list); -- vkd3d_free(list); -+ hlsl_block_cleanup(block); -+ vkd3d_free(block); - } - - static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, -@@ -164,7 +177,7 @@ static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct - src_comp_type = hlsl_type_get_component_type(ctx, src, k); - dst_comp_type = hlsl_type_get_component_type(ctx, dst, k); - -- if ((src_comp_type->type != HLSL_CLASS_SCALAR || dst_comp_type->type != HLSL_CLASS_SCALAR) -+ if ((src_comp_type->class != HLSL_CLASS_SCALAR || dst_comp_type->class != HLSL_CLASS_SCALAR) - && !hlsl_types_are_equal(src_comp_type, dst_comp_type)) - return false; - } -@@ -196,9 +209,9 @@ static bool type_contains_only_numerics(struct hlsl_type *type) - { - unsigned int i; - -- if (type->type == HLSL_CLASS_ARRAY) -+ if (type->class == HLSL_CLASS_ARRAY) - return type_contains_only_numerics(type->e.array.type); -- if (type->type == HLSL_CLASS_STRUCT) -+ if (type->class == HLSL_CLASS_STRUCT) - { - for (i = 0; i < type->e.record.field_count; ++i) - { -@@ -207,23 +220,23 @@ static bool type_contains_only_numerics(struct hlsl_type *type) - } - return true; - } -- return type->type <= HLSL_CLASS_LAST_NUMERIC; -+ return type->class <= HLSL_CLASS_LAST_NUMERIC; - } - - static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) - { -- if (src->type <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) -+ if (src->class <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) - return true; - -- if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX -+ if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX - && src->dimx >= dst->dimx && src->dimy >= dst->dimy) - return true; - -- if ((src->type == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) -+ if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) - && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) - return false; - -- if ((dst->type == HLSL_CLASS_MATRIX && dst->dimy > 1) -+ if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) - && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) - return false; - -@@ -232,10 +245,10 @@ static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - - static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) - { -- if ((src->type <= HLSL_CLASS_LAST_NUMERIC) != (dst->type <= HLSL_CLASS_LAST_NUMERIC)) -+ if ((src->class <= HLSL_CLASS_LAST_NUMERIC) != (dst->class <= HLSL_CLASS_LAST_NUMERIC)) - return false; - -- if (src->type <= HLSL_CLASS_LAST_NUMERIC) -+ if (src->class <= HLSL_CLASS_LAST_NUMERIC) - { - /* Scalar vars can be converted to any other numeric data type */ - if (src->dimx == 1 && src->dimy == 1) -@@ -244,21 +257,21 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - if (dst->dimx == 1 && dst->dimy == 1) - return true; - -- if (src->type == HLSL_CLASS_MATRIX || dst->type == HLSL_CLASS_MATRIX) -+ if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) - { -- if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX) -+ if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) - return src->dimx >= dst->dimx && src->dimy >= dst->dimy; - - /* Matrix-vector conversion is apparently allowed if they have - * the same components count, or if the matrix is 1xN or Nx1 - * and we are reducing the component count */ -- if (src->type == HLSL_CLASS_VECTOR || dst->type == HLSL_CLASS_VECTOR) -+ if (src->class == HLSL_CLASS_VECTOR || dst->class == HLSL_CLASS_VECTOR) - { - if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) - return true; - -- if ((src->type == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && -- (dst->type == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) -+ if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && -+ (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) - return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); - } - -@@ -273,19 +286,16 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - return hlsl_types_are_componentwise_equal(ctx, src, dst); - } - --static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- unsigned int comp, const struct vkd3d_shader_location *loc); -- --static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *src_type = node->data_type; -- struct hlsl_ir_expr *cast; -+ struct hlsl_ir_node *cast; - - if (hlsl_types_are_equal(src_type, dst_type)) - return node; - -- if (src_type->type > HLSL_CLASS_VECTOR || dst_type->type > HLSL_CLASS_VECTOR) -+ if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) - { - unsigned int src_comp_count = hlsl_type_component_count(src_type); - unsigned int dst_comp_count = hlsl_type_component_count(dst_type); -@@ -295,9 +305,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_var *var; - unsigned int dst_idx; - -- broadcast = src_type->type <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; -+ broadcast = src_type->class <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; - matrix_cast = !broadcast && dst_comp_count != src_comp_count -- && src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX; -+ && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; - assert(src_comp_count >= dst_comp_count || broadcast); - if (matrix_cast) - { -@@ -311,9 +321,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - - for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) - { -+ struct hlsl_ir_node *component_load; - struct hlsl_type *dst_comp_type; -- struct hlsl_ir_store *store; -- struct hlsl_block block; -+ struct hlsl_block store_block; - unsigned int src_idx; - - if (broadcast) -@@ -333,21 +343,21 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - - dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - -- if (!(load = add_load_component(ctx, instrs, node, src_idx, loc))) -+ if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) - return NULL; - -- if (!(cast = hlsl_new_cast(ctx, &load->node, dst_comp_type, loc))) -+ if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) - return NULL; -- list_add_tail(instrs, &cast->node.entry); -+ hlsl_block_add_instr(block, cast); - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, &cast->node))) -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast)) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &store_block); - } - -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) -+ if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - - return &load->node; - } -@@ -355,12 +365,12 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, - { - if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) - return NULL; -- list_add_tail(instrs, &cast->node.entry); -- return &cast->node; -+ hlsl_block_add_instr(block, cast); -+ return cast; - } - } - --static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *src_type = node->data_type; -@@ -384,19 +394,20 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - - if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) - hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", -- src_type->type == HLSL_CLASS_VECTOR ? "vector" : "matrix"); -+ src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); - -- return add_cast(ctx, instrs, node, dst_type, loc); -+ return add_cast(ctx, block, node, dst_type, loc); - } - --static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, const struct vkd3d_shader_location loc) -+static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, -+ const struct vkd3d_shader_location *loc) - { - if (modifiers & mod) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_modifiers_to_string(ctx, mod))) -- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifier '%s' was already specified.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return modifiers; -@@ -404,28 +415,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, con - return modifiers | mod; - } - --static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) -+static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) - { -- struct hlsl_ir_node *condition, *not; -- struct hlsl_ir_jump *jump; -- struct hlsl_ir_if *iff; -+ struct hlsl_ir_node *condition, *not, *iff, *jump; -+ struct hlsl_block then_block; - - /* E.g. "for (i = 0; ; ++i)". */ -- if (list_empty(cond_list)) -+ if (list_empty(&cond_block->instrs)) - return true; - -- condition = node_from_list(cond_list); -- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, condition->loc))) -+ condition = node_from_block(cond_block); -+ if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) - return false; -- list_add_tail(cond_list, ¬->entry); -+ hlsl_block_add_instr(cond_block, not); - -- if (!(iff = hlsl_new_if(ctx, not, condition->loc))) -+ hlsl_block_init(&then_block); -+ -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) - return false; -- list_add_tail(cond_list, &iff->node.entry); -+ hlsl_block_add_instr(&then_block, jump); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, condition->loc))) -+ if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) - return false; -- list_add_head(&iff->then_instrs.instrs, &jump->node.entry); -+ hlsl_block_add_instr(cond_block, iff); - return true; - } - -@@ -436,50 +448,87 @@ enum loop_type - LOOP_DO_WHILE - }; - --static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, struct list *init, struct list *cond, -- struct list *iter, struct list *body, struct vkd3d_shader_location loc) -+static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) - { -- struct list *list = NULL; -- struct hlsl_ir_loop *loop = NULL; -- struct hlsl_ir_if *cond_jump = NULL; -+ unsigned int i, j; - -- if (!(list = make_empty_list(ctx))) -- goto oom; -+ for (i = 0; i < attrs->count; ++i) -+ { -+ for (j = i + 1; j < attrs->count; ++j) -+ { -+ if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, -+ const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, -+ struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *loop; -+ unsigned int i; -+ -+ if (attribute_list_has_duplicates(attributes)) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); - -- if (init) -- list_move_head(list, init); -+ /* Ignore unroll(0) attribute, and any invalid attribute. */ -+ for (i = 0; i < attributes->count; ++i) -+ { -+ const struct hlsl_attribute *attr = attributes->attrs[i]; -+ if (!strcmp(attr->name, "unroll")) -+ { -+ if (attr->args_count) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); -+ } -+ else -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); -+ } -+ } -+ else if (!strcmp(attr->name, "loop") -+ || !strcmp(attr->name, "fastopt") -+ || !strcmp(attr->name, "allow_uav_condition")) -+ { -+ hlsl_fixme(ctx, loc, "Unhandled attribute '%s'.", attr->name); -+ } -+ else -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); -+ } -+ } - -- if (!(loop = hlsl_new_loop(ctx, loc))) -+ if (!init && !(init = make_empty_block(ctx))) - goto oom; -- list_add_tail(list, &loop->node.entry); - - if (!append_conditional_break(ctx, cond)) - goto oom; - -- if (type != LOOP_DO_WHILE) -- list_move_tail(&loop->body.instrs, cond); -- -- list_move_tail(&loop->body.instrs, body); -- - if (iter) -- list_move_tail(&loop->body.instrs, iter); -+ hlsl_block_add_block(body, iter); - - if (type == LOOP_DO_WHILE) -- list_move_tail(&loop->body.instrs, cond); -+ list_move_tail(&body->instrs, &cond->instrs); -+ else -+ list_move_head(&body->instrs, &cond->instrs); - -- vkd3d_free(init); -- vkd3d_free(cond); -- vkd3d_free(body); -- return list; -+ if (!(loop = hlsl_new_loop(ctx, body, loc))) -+ goto oom; -+ hlsl_block_add_instr(init, loop); -+ -+ destroy_block(cond); -+ destroy_block(body); -+ destroy_block(iter); -+ return init; - - oom: -- vkd3d_free(loop); -- vkd3d_free(cond_jump); -- vkd3d_free(list); -- destroy_instr_list(init); -- destroy_instr_list(cond); -- destroy_instr_list(iter); -- destroy_instr_list(body); -+ destroy_block(init); -+ destroy_block(cond); -+ destroy_block(iter); -+ destroy_block(body); - return NULL; - } - -@@ -496,18 +545,18 @@ static unsigned int initializer_size(const struct parse_initializer *initializer - - static void free_parse_initializer(struct parse_initializer *initializer) - { -- destroy_instr_list(initializer->instrs); -+ destroy_block(initializer->instrs); - vkd3d_free(initializer->args); - } - --static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, -+static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, - struct vkd3d_shader_location *loc) - { - unsigned int len = strlen(swizzle), component = 0; - unsigned int i, set, swiz = 0; - bool valid; - -- if (value->data_type->type == HLSL_CLASS_MATRIX) -+ if (value->data_type->class == HLSL_CLASS_MATRIX) - { - /* Matrix swizzle */ - bool m_swizzle; -@@ -582,224 +631,102 @@ static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_ - return NULL; - } - --static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs, -- struct hlsl_ir_node *return_value, struct vkd3d_shader_location loc) -+static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) - { - struct hlsl_type *return_type = ctx->cur_function->return_type; -- struct hlsl_ir_jump *jump; -+ struct hlsl_ir_node *jump; - - if (ctx->cur_function->return_var) - { - if (return_value) - { -- struct hlsl_ir_store *store; -+ struct hlsl_ir_node *store; - -- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) -- return NULL; -+ if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) -+ return false; - - if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) -- return NULL; -- list_add_after(&return_value->entry, &store->node.entry); -+ return false; -+ list_add_after(&return_value->entry, &store->entry); - } - else - { -- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); -- return NULL; -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); -+ return false; - } - } - else - { - if (return_value) -- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); - } - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) -- return NULL; -- list_add_tail(instrs, &jump->node.entry); -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) -+ return false; -+ hlsl_block_add_instr(block, jump); - -- return jump; -+ return true; - } - --static struct hlsl_ir_load *add_load_index(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) -+struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) - { -- const struct hlsl_deref *src; -- struct hlsl_ir_load *load; -- -- if (var_instr->type == HLSL_IR_LOAD) -- { -- src = &hlsl_ir_load(var_instr)->src; -- } -- else -- { -- struct hlsl_ir_store *store; -- struct hlsl_ir_var *var; -- -- if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) -- return NULL; -- -- if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) -- return NULL; -- list_add_tail(instrs, &store->node.entry); -- -- src = &store->lhs; -- } -+ struct hlsl_ir_node *load, *store; -+ struct hlsl_block load_block; -+ struct hlsl_ir_var *var; -+ struct hlsl_deref src; - -- if (!(load = hlsl_new_load_index(ctx, src, idx, loc))) -+ if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -- -- return load; --} -- --static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, -- unsigned int comp, const struct vkd3d_shader_location *loc) --{ -- const struct hlsl_deref *src; -- struct hlsl_ir_load *load; -- struct hlsl_block block; -- -- if (var_instr->type == HLSL_IR_LOAD) -- { -- src = &hlsl_ir_load(var_instr)->src; -- } -- else -- { -- struct hlsl_ir_store *store; -- struct hlsl_ir_var *var; -- -- if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) -- return NULL; -- -- if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) -- return NULL; -- list_add_tail(instrs, &store->node.entry); - -- src = &store->lhs; -- } -+ if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) -+ return NULL; -+ hlsl_block_add_instr(block, store); - -- if (!(load = hlsl_new_load_component(ctx, &block, src, comp, loc))) -+ hlsl_init_simple_deref_from_var(&src, var); -+ if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) - return NULL; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(block, &load_block); - - return load; - } - --static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, -- unsigned int idx, const struct vkd3d_shader_location loc) -+static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *record, -+ unsigned int idx, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *index, *c; - - assert(idx < record->data_type->e.record.field_count); - -- if (!(c = hlsl_new_uint_constant(ctx, idx, &loc))) -- return false; -- list_add_tail(instrs, &c->node.entry); -- -- return !!add_load_index(ctx, instrs, record, &c->node, &loc); --} -- --static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -- enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, -- const struct vkd3d_shader_location *loc); -- --static bool add_matrix_index(struct hlsl_ctx *ctx, struct list *instrs, -- struct hlsl_ir_node *matrix, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_type *mat_type = matrix->data_type, *ret_type; -- struct hlsl_deref var_deref; -- struct hlsl_ir_load *load; -- struct hlsl_ir_var *var; -- unsigned int i; -- -- if (hlsl_type_is_row_major(mat_type)) -- return add_load_index(ctx, instrs, matrix, index, loc); -- -- ret_type = hlsl_get_vector_type(ctx, mat_type->base_type, mat_type->dimx); -- -- if (!(var = hlsl_new_synthetic_var(ctx, "index", ret_type, loc))) -+ if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) - return false; -- hlsl_init_simple_deref_from_var(&var_deref, var); -- -- for (i = 0; i < mat_type->dimx; ++i) -- { -- struct hlsl_ir_load *column, *value; -- struct hlsl_ir_store *store; -- struct hlsl_ir_constant *c; -- struct hlsl_block block; -- -- if (!(c = hlsl_new_uint_constant(ctx, i, loc))) -- return false; -- list_add_tail(instrs, &c->node.entry); -- -- if (!(column = add_load_index(ctx, instrs, matrix, &c->node, loc))) -- return false; -- -- if (!(value = add_load_index(ctx, instrs, &column->node, index, loc))) -- return false; -+ hlsl_block_add_instr(block, c); - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i, &value->node))) -- return false; -- list_move_tail(instrs, &block.instrs); -- } -- -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) -+ if (!(index = hlsl_new_index(ctx, record, c, loc))) - return false; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, index); - - return true; - } - --static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct list *instrs, -- struct hlsl_ir_node *index, unsigned int dim_count, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_load *coords_load; -- struct hlsl_deref coords_deref; -- struct hlsl_ir_constant *zero; -- struct hlsl_ir_store *store; -- struct hlsl_ir_var *coords; -- -- if (!(coords = hlsl_new_synthetic_var(ctx, "coords", -- hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) -- return NULL; -- -- hlsl_init_simple_deref_from_var(&coords_deref, coords); -- if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) -- return NULL; -- list_add_tail(instrs, &store->node.entry); -- -- if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) -- return NULL; -- list_add_tail(instrs, &zero->node.entry); -- -- if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, &zero->node, 1u << dim_count, loc))) -- return NULL; -- list_add_tail(instrs, &store->node.entry); -- -- if (!(coords_load = hlsl_new_var_load(ctx, coords, *loc))) -- return NULL; -- list_add_tail(instrs, &coords_load->node.entry); -- -- return &coords_load->node; --} -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, -+ const struct vkd3d_shader_location *loc); - --static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, -+static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *array, - struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; -- struct hlsl_ir_expr *cast; -+ struct hlsl_ir_node *return_index, *cast; - -- if (expr_type->type == HLSL_CLASS_OBJECT -+ if (expr_type->class == HLSL_CLASS_OBJECT - && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) - && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { -- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; - unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); -- /* Only HLSL_IR_LOAD can return an object. */ -- struct hlsl_ir_load *object_load = hlsl_ir_load(array); -- struct hlsl_ir_resource_load *resource_load; - -- if (index_type->type > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) -+ if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) - { - struct vkd3d_string_buffer *string; - -@@ -810,24 +737,18 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls - return false; - } - -- if (!(index = add_implicit_conversion(ctx, instrs, index, -+ if (!(index = add_implicit_conversion(ctx, block, index, - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) - return false; - -- if (!(index = add_zero_mipmap_level(ctx, instrs, index, dim_count, loc))) -+ if (!(return_index = hlsl_new_index(ctx, array, index, loc))) - return false; -+ hlsl_block_add_instr(block, return_index); - -- load_params.format = expr_type->e.resource_format; -- load_params.resource = object_load->src; -- load_params.coords = index; -- -- if (!(resource_load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- list_add_tail(instrs, &resource_load->node.entry); - return true; - } - -- if (index_type->type != HLSL_CLASS_SCALAR) -+ if (index_type->class != HLSL_CLASS_SCALAR) - { - hlsl_error(ctx, &index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Array index is not scalar."); - return false; -@@ -835,23 +756,21 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls - - if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) - return false; -- list_add_tail(instrs, &cast->node.entry); -- index = &cast->node; -+ hlsl_block_add_instr(block, cast); -+ index = cast; - -- if (expr_type->type == HLSL_CLASS_MATRIX) -- return add_matrix_index(ctx, instrs, array, index, loc); -- -- if (expr_type->type != HLSL_CLASS_ARRAY && expr_type->type != HLSL_CLASS_VECTOR) -+ if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) - { -- if (expr_type->type == HLSL_CLASS_SCALAR) -+ if (expr_type->class == HLSL_CLASS_SCALAR) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Scalar expressions cannot be array-indexed."); - else - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Expression cannot be array-indexed."); - return false; - } - -- if (!add_load_index(ctx, instrs, array, index, loc)) -+ if (!(return_index = hlsl_new_index(ctx, array, index, loc))) - return false; -+ hlsl_block_add_instr(block, return_index); - - return true; - } -@@ -877,12 +796,12 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_ - - if (!(*modifiers & HLSL_MODIFIERS_MAJORITY_MASK) - && !(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK) -- && type->type == HLSL_CLASS_MATRIX) -+ && type->class == HLSL_CLASS_MATRIX) - { - if (!(default_majority = ctx->matrix_majority) && force_majority) - default_majority = HLSL_MODIFIER_COLUMN_MAJOR; - } -- else if (type->type != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) -+ else if (type->class != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "'row_major' and 'column_major' modifiers are only allowed for matrices."); -@@ -917,13 +836,23 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) - return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; - } - -+static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); -+} -+ -+static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -+{ -+ return !shader_profile_version_ge(ctx, major, minor); -+} -+ - static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - struct hlsl_type *type, unsigned int modifiers, struct list *defs) - { - struct parse_variable_def *v, *v_next; - size_t i = 0; - -- if (type->type == HLSL_CLASS_MATRIX) -+ if (type->class == HLSL_CLASS_MATRIX) - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - memset(fields, 0, sizeof(*fields)); -@@ -939,7 +868,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - - field->type = type; - -- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) -+ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) - { - for (k = 0; k < v->arrays.count; ++k) - unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -@@ -983,6 +912,9 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); - free_parse_initializer(&v->initializer); - } -+ if (v->reg_reservation.offset_type) -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() is not allowed inside struct definitions."); - vkd3d_free(v); - } - vkd3d_free(defs); -@@ -1052,18 +984,23 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, - } - - static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, -- struct parse_parameter *param, const struct vkd3d_shader_location loc) -+ struct parse_parameter *param, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_var *var; - -- if (param->type->type == HLSL_CLASS_MATRIX) -+ if (param->type->class == HLSL_CLASS_MATRIX) - assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) -- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name); - -- if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ¶m->reg_reservation))) -+ if (param->reg_reservation.offset_type) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() is not allowed on function parameters."); -+ -+ if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, -+ ¶m->reg_reservation))) - return false; - var->is_param = 1; - -@@ -1084,12 +1021,61 @@ static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) - { - struct hlsl_reg_reservation reservation = {0}; - -- if (!sscanf(reg_string + 1, "%u", &reservation.index)) -+ if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) - { - FIXME("Unsupported register reservation syntax.\n"); - return reservation; - } -- reservation.type = reg_string[0]; -+ reservation.reg_type = ascii_tolower(reg_string[0]); -+ return reservation; -+} -+ -+static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, -+ const char *swizzle, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_reg_reservation reservation = {0}; -+ char *endptr; -+ -+ if (shader_profile_version_lt(ctx, 4, 0)) -+ return reservation; -+ -+ reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); -+ if (*endptr) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid packoffset() syntax."); -+ return reservation; -+ } -+ -+ reservation.offset_type = ascii_tolower(reg_string[0]); -+ if (reservation.offset_type != 'c') -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Only 'c' registers are allowed in packoffset()."); -+ return reservation; -+ } -+ -+ reservation.offset_index *= 4; -+ -+ if (swizzle) -+ { -+ if (strlen(swizzle) != 1) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid packoffset() component \"%s\".", swizzle); -+ -+ if (swizzle[0] == 'x' || swizzle[0] == 'r') -+ reservation.offset_index += 0; -+ else if (swizzle[0] == 'y' || swizzle[0] == 'g') -+ reservation.offset_index += 1; -+ else if (swizzle[0] == 'z' || swizzle[0] == 'b') -+ reservation.offset_index += 2; -+ else if (swizzle[0] == 'w' || swizzle[0] == 'a') -+ reservation.offset_index += 3; -+ else -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid packoffset() component \"%s\".", swizzle); -+ } -+ - return reservation; - } - -@@ -1109,66 +1095,82 @@ static struct hlsl_ir_function_decl *get_func_decl(struct rb_tree *funcs, - return NULL; - } - --static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) -+static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr) - { -- struct list *list; -+ struct hlsl_block *block; - -- if (!(list = make_empty_list(ctx))) -+ if (!(block = make_empty_block(ctx))) - { -- hlsl_free_instr(node); -+ hlsl_free_instr(instr); - return NULL; - } -- list_add_tail(list, &node->entry); -- return list; -+ hlsl_block_add_instr(block, instr); -+ return block; - } - --static unsigned int evaluate_static_expression(struct hlsl_ir_node *node) -+static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct vkd3d_shader_location *loc) - { -- if (node->data_type->type != HLSL_CLASS_SCALAR) -- return 0; -+ struct hlsl_ir_constant *constant; -+ struct hlsl_ir_node *node; -+ struct hlsl_block expr; -+ unsigned int ret = 0; -+ bool progress; - -- switch (node->type) -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { -- case HLSL_IR_CONSTANT: -+ switch (node->type) - { -- struct hlsl_ir_constant *constant = hlsl_ir_constant(node); -- const union hlsl_constant_value *value = &constant->value[0]; -- -- switch (constant->node.data_type->base_type) -- { -- case HLSL_TYPE_UINT: -- return value->u; -- case HLSL_TYPE_INT: -- return value->i; -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- return value->f; -- case HLSL_TYPE_DOUBLE: -- return value->d; -- case HLSL_TYPE_BOOL: -- return !!value->u; -- default: -- vkd3d_unreachable(); -- } -+ case HLSL_IR_CONSTANT: -+ case HLSL_IR_EXPR: -+ case HLSL_IR_SWIZZLE: -+ case HLSL_IR_LOAD: -+ case HLSL_IR_INDEX: -+ continue; -+ case HLSL_IR_CALL: -+ case HLSL_IR_IF: -+ case HLSL_IR_LOOP: -+ case HLSL_IR_JUMP: -+ case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_RESOURCE_STORE: -+ case HLSL_IR_STORE: -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Expected literal expression."); - } -+ } - -- case HLSL_IR_EXPR: -- case HLSL_IR_LOAD: -- case HLSL_IR_RESOURCE_LOAD: -- case HLSL_IR_SWIZZLE: -- FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); -- return 0; -+ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -+ return 0; -+ hlsl_block_add_block(&expr, block); - -- case HLSL_IR_CALL: -- case HLSL_IR_IF: -- case HLSL_IR_JUMP: -- case HLSL_IR_LOOP: -- case HLSL_IR_RESOURCE_STORE: -- case HLSL_IR_STORE: -- vkd3d_unreachable(); -+ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) -+ { -+ hlsl_block_cleanup(&expr); -+ return 0; -+ } -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, &expr); -+ } while (progress); -+ -+ node = node_from_block(&expr); -+ if (node->type == HLSL_IR_CONSTANT) -+ { -+ constant = hlsl_ir_constant(node); -+ ret = constant->value.u[0].u; -+ } -+ else -+ { -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Failed to evaluate constant expression."); - } - -- vkd3d_unreachable(); -+ hlsl_block_cleanup(&expr); -+ -+ return ret; - } - - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) -@@ -1180,20 +1182,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t - if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) - return true; - -- if (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_VECTOR) -+ if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) - return true; - -- if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) -+ if (t1->class == HLSL_CLASS_MATRIX || t2->class == HLSL_CLASS_MATRIX) - { - /* Matrix-vector conversion is apparently allowed if either they have the same components - count or the matrix is nx1 or 1xn */ -- if (t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) -+ if (t1->class == HLSL_CLASS_VECTOR || t2->class == HLSL_CLASS_VECTOR) - { - if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) - return true; - -- return (t1->type == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) -- || (t2->type == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); -+ return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) -+ || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); - } - - /* Both matrices */ -@@ -1226,7 +1228,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl - static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, - const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) - { -- if (t1->type > HLSL_CLASS_LAST_NUMERIC) -+ if (t1->class > HLSL_CLASS_LAST_NUMERIC) - { - struct vkd3d_string_buffer *string; - -@@ -1237,7 +1239,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct - return false; - } - -- if (t2->type > HLSL_CLASS_LAST_NUMERIC) -+ if (t2->class > HLSL_CLASS_LAST_NUMERIC) - { - struct vkd3d_string_buffer *string; - -@@ -1264,17 +1266,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct - - if (t1->dimx == 1 && t1->dimy == 1) - { -- *type = t2->type; -+ *type = t2->class; - *dimx = t2->dimx; - *dimy = t2->dimy; - } - else if (t2->dimx == 1 && t2->dimy == 1) - { -- *type = t1->type; -+ *type = t1->class; - *dimx = t1->dimx; - *dimy = t1->dimy; - } -- else if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX) -+ else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) - { - *type = HLSL_CLASS_MATRIX; - *dimx = min(t1->dimx, t2->dimx); -@@ -1284,13 +1286,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct - { - if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) - { -- *type = t1->type; -+ *type = t1->class; - *dimx = t1->dimx; - *dimy = t1->dimy; - } - else - { -- *type = t2->type; -+ *type = t2->class; - *dimx = t2->dimx; - *dimy = t2->dimy; - } -@@ -1299,67 +1301,62 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct - return true; - } - --static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], - struct hlsl_type *type, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *expr; - unsigned int i; - -- if (type->type == HLSL_CLASS_MATRIX) -+ if (type->class == HLSL_CLASS_MATRIX) - { -- struct hlsl_type *vector_type; -+ struct hlsl_type *scalar_type; -+ struct hlsl_ir_load *var_load; - struct hlsl_deref var_deref; -- struct hlsl_ir_load *load; -+ struct hlsl_ir_node *load; - struct hlsl_ir_var *var; - -- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ scalar_type = hlsl_get_scalar_type(ctx, type->base_type); - - if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) - return NULL; - hlsl_init_simple_deref_from_var(&var_deref, var); - -- for (i = 0; i < hlsl_type_major_size(type); ++i) -+ for (i = 0; i < type->dimy * type->dimx; ++i) - { -- struct hlsl_ir_node *value, *vector_operands[HLSL_MAX_OPERANDS] = { NULL }; -- struct hlsl_ir_store *store; -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; -+ struct hlsl_block store_block; - unsigned int j; - -- if (!(c = hlsl_new_uint_constant(ctx, i, loc))) -- return NULL; -- list_add_tail(instrs, &c->node.entry); -- - for (j = 0; j < HLSL_MAX_OPERANDS; j++) - { - if (operands[j]) - { -- struct hlsl_ir_load *load; -- -- if (!(load = add_load_index(ctx, instrs, operands[j], &c->node, loc))) -+ if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) - return NULL; -- vector_operands[j] = &load->node; -+ -+ cell_operands[j] = load; - } - } - -- if (!(value = add_expr(ctx, instrs, op, vector_operands, vector_type, loc))) -+ if (!(value = add_expr(ctx, block, op, cell_operands, scalar_type, loc))) - return NULL; - -- if (!(store = hlsl_new_store_index(ctx, &var_deref, &c->node, value, 0, loc))) -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, value)) - return NULL; -- list_add_tail(instrs, &store->node.entry); -+ hlsl_block_add_block(block, &store_block); - } - -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) -+ if (!(var_load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &var_load->node); - -- return &load->node; -+ return &var_load->node; - } - - if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) - return NULL; -- list_add_tail(instrs, &expr->entry); -+ hlsl_block_add_instr(block, expr); - - return expr; - } -@@ -1385,95 +1382,79 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * - } - } - --static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; - -- return add_expr(ctx, instrs, op, args, arg->data_type, loc); -+ return add_expr(ctx, block, op, args, arg->data_type, loc); - } - --static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - check_integer_type(ctx, arg); - -- return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); -+ return add_unary_arithmetic_expr(ctx, block, op, arg, loc); - } - --static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *bool_type; - -- bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, -+ bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, - arg->data_type->dimx, arg->data_type->dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg, bool_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, bool_type, loc); -+ return add_expr(ctx, block, op, args, bool_type, loc); - } - --static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, -- enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, -- const struct vkd3d_shader_location *loc) -+static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, -+ const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { -- struct hlsl_type *common_type; - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); - enum hlsl_type_class type; - unsigned int dimx, dimy; -- struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; - - if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) - return NULL; - -- common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); -+ return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); -+} - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -- return NULL; -+static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *common_type; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -- return NULL; -+ common_type = get_common_numeric_type(ctx, arg1, arg2, loc); - -- return add_expr(ctx, instrs, op, args, common_type, loc); --} -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) -+ return NULL; - --static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) -+ return NULL; - -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, &loc); -- return list1; -+ return add_expr(ctx, block, op, args, common_type, loc); - } - --static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { - check_integer_type(ctx, arg1); - check_integer_type(ctx, arg2); - -- return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); --} -- --static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; -+ return add_binary_arithmetic_expr(ctx, block, op, arg1, arg2, loc); - } - --static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1489,27 +1470,16 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, return_type, loc); --} -- --static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); -- return list1; -+ return add_expr(ctx, block, op, args, return_type, loc); - } - --static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1523,28 +1493,16 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct - - common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, common_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, common_type, loc); --} -- --static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; -+ return add_expr(ctx, block, op, args, common_type, loc); - } - --static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { -@@ -1566,28 +1524,16 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l - return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); - integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy); - -- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) -+ if (!(args[0] = add_implicit_conversion(ctx, block, arg1, return_type, loc))) - return NULL; - -- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) -+ if (!(args[1] = add_implicit_conversion(ctx, block, arg2, integer_type, loc))) - return NULL; - -- return add_expr(ctx, instrs, op, args, return_type, loc); --} -- --static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, -- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); -- -- list_move_tail(list1, list2); -- vkd3d_free(list2); -- add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); -- -- return list1; -+ return add_expr(ctx, block, op, args, return_type, loc); - } - --static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, -+static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) - { - enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); -@@ -1596,31 +1542,29 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - enum hlsl_ir_expr_op op; - unsigned dim; - -- if (arg1->data_type->type == HLSL_CLASS_MATRIX) -+ if (arg1->data_type->class == HLSL_CLASS_MATRIX) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg1->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } - -- if (arg2->data_type->type == HLSL_CLASS_MATRIX) -+ if (arg2->data_type->class == HLSL_CLASS_MATRIX) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg2->data_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid type %s.\n", string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return NULL; - } - -- if (arg1->data_type->type == HLSL_CLASS_SCALAR) -+ if (arg1->data_type->class == HLSL_CLASS_SCALAR) - dim = arg2->data_type->dimx; -- else if (arg2->data_type->type == HLSL_CLASS_SCALAR) -+ else if (arg2->data_type->class == HLSL_CLASS_SCALAR) - dim = arg1->data_type->dimx; - else - dim = min(arg1->data_type->dimx, arg2->data_type->dimx); -@@ -1642,28 +1586,75 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis - return add_expr(ctx, instrs, op, args, ret_type, loc); - } - --static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) -+static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, -+ struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) - { -- static const enum hlsl_ir_expr_op ops[] = -- { -- 0, -- HLSL_OP2_ADD, -- 0, -- HLSL_OP2_MUL, -- HLSL_OP2_DIV, -- HLSL_OP2_MOD, -- HLSL_OP2_LSHIFT, -- HLSL_OP2_RSHIFT, -- HLSL_OP2_BIT_AND, -- HLSL_OP2_BIT_OR, -- HLSL_OP2_BIT_XOR, -- }; -+ struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); - -- return ops[op]; --} -+ hlsl_block_add_block(block1, block2); -+ destroy_block(block2); - --static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsigned int *ret_width) --{ -+ switch (op) -+ { -+ case HLSL_OP2_ADD: -+ case HLSL_OP2_DIV: -+ case HLSL_OP2_MOD: -+ case HLSL_OP2_MUL: -+ add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_BIT_XOR: -+ add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LESS: -+ case HLSL_OP2_GEQUAL: -+ case HLSL_OP2_EQUAL: -+ case HLSL_OP2_NEQUAL: -+ add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LOGIC_AND: -+ case HLSL_OP2_LOGIC_OR: -+ add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ case HLSL_OP2_LSHIFT: -+ case HLSL_OP2_RSHIFT: -+ add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ return block1; -+} -+ -+static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) -+{ -+ static const enum hlsl_ir_expr_op ops[] = -+ { -+ 0, -+ HLSL_OP2_ADD, -+ 0, -+ HLSL_OP2_MUL, -+ HLSL_OP2_DIV, -+ HLSL_OP2_MOD, -+ HLSL_OP2_LSHIFT, -+ HLSL_OP2_RSHIFT, -+ HLSL_OP2_BIT_AND, -+ HLSL_OP2_BIT_OR, -+ HLSL_OP2_BIT_XOR, -+ }; -+ -+ return ops[op]; -+} -+ -+static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsigned int *ret_width) -+{ - unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; - - /* Apply the writemask to the swizzle to get a new writemask and swizzle. */ -@@ -1698,16 +1689,16 @@ static bool invert_swizzle(unsigned int *swizzle, unsigned int *writemask, unsig - return true; - } - --static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *lhs, -+static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) - { - struct hlsl_type *lhs_type = lhs->data_type; -- struct hlsl_ir_expr *copy; -+ struct hlsl_ir_node *copy; - unsigned int writemask = 0; - - if (assign_op == ASSIGN_OP_SUB) - { -- if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) -+ if (!(rhs = add_unary_arithmetic_expr(ctx, block, HLSL_OP1_NEG, rhs, &rhs->loc))) - return NULL; - assign_op = ASSIGN_OP_ADD; - } -@@ -1716,17 +1707,17 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - enum hlsl_ir_expr_op op = op_from_assignment(assign_op); - - assert(op); -- if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) -+ if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) - return NULL; - } - -- if (lhs_type->type <= HLSL_CLASS_LAST_NUMERIC) -+ if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) - writemask = (1 << lhs_type->dimx) - 1; - -- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) -+ if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) - return NULL; - -- while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_RESOURCE_LOAD) -+ while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) - { - if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) - { -@@ -1735,10 +1726,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - } - else if (lhs->type == HLSL_IR_SWIZZLE) - { -- struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs), *new_swizzle; -+ struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); - unsigned int width, s = swizzle->swizzle; -+ struct hlsl_ir_node *new_swizzle; - -- if (lhs->data_type->type == HLSL_CLASS_MATRIX) -+ if (lhs->data_type->class == HLSL_CLASS_MATRIX) - hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); - - if (!invert_swizzle(&s, &writemask, &width)) -@@ -1751,10 +1743,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - { - return NULL; - } -- list_add_tail(instrs, &new_swizzle->node.entry); -+ hlsl_block_add_instr(block, new_swizzle); - - lhs = swizzle->val.node; -- rhs = &new_swizzle->node; -+ rhs = new_swizzle; - } - else - { -@@ -1763,18 +1755,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - } - } - -- if (lhs->type == HLSL_IR_RESOURCE_LOAD) -+ if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_resource_access(hlsl_ir_index(lhs))) - { -- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(lhs); -- struct hlsl_ir_resource_store *store; -+ struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; -+ struct hlsl_deref resource_deref; - struct hlsl_type *resource_type; -- struct hlsl_ir_swizzle *coords; -+ struct hlsl_ir_node *store; - unsigned int dim_count; - -- /* Such an lvalue was produced by an index expression. */ -- assert(load->load_type == HLSL_RESOURCE_LOAD); -- resource_type = hlsl_deref_get_type(ctx, &load->resource); -- assert(resource_type->type == HLSL_CLASS_OBJECT); -+ if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node)) -+ return NULL; -+ -+ resource_type = hlsl_deref_get_type(ctx, &resource_deref); -+ assert(resource_type->class == HLSL_CLASS_OBJECT); - assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); - - if (resource_type->base_type != HLSL_TYPE_UAV) -@@ -1787,25 +1780,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, - "Resource store expressions must write to all components."); - -- /* Remove the (implicit) mipmap level from the load expression. */ -- assert(load->coords.node->data_type->type == HLSL_CLASS_VECTOR); -- assert(load->coords.node->data_type->base_type == HLSL_TYPE_UINT); -- assert(load->coords.node->data_type->dimx == dim_count + 1); -- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dim_count, load->coords.node, &lhs->loc))) -- return NULL; -- list_add_tail(instrs, &coords->node.entry); -+ assert(coords->data_type->class == HLSL_CLASS_VECTOR); -+ assert(coords->data_type->base_type == HLSL_TYPE_UINT); -+ assert(coords->data_type->dimx == dim_count); - -- if (!(store = hlsl_new_resource_store(ctx, &load->resource, &coords->node, rhs, &lhs->loc))) -+ if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) -+ { -+ hlsl_cleanup_deref(&resource_deref); - return NULL; -- list_add_tail(instrs, &store->node.entry); -+ } -+ hlsl_block_add_instr(block, store); -+ hlsl_cleanup_deref(&resource_deref); -+ } -+ else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) -+ { -+ struct hlsl_ir_index *row = hlsl_ir_index(lhs); -+ struct hlsl_ir_node *mat = row->val.node; -+ unsigned int i, k = 0; -+ -+ for (i = 0; i < mat->data_type->dimx; ++i) -+ { -+ struct hlsl_ir_node *cell, *load, *store, *c; -+ struct hlsl_deref deref; -+ -+ if (!(writemask & (1 << i))) -+ continue; -+ -+ if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) -+ return NULL; -+ hlsl_block_add_instr(block, c); -+ -+ if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) -+ return NULL; -+ hlsl_block_add_instr(block, cell); -+ -+ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) -+ return NULL; -+ -+ if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) -+ return NULL; -+ -+ if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc))) -+ { -+ hlsl_cleanup_deref(&deref); -+ return NULL; -+ } -+ hlsl_block_add_instr(block, store); -+ hlsl_cleanup_deref(&deref); -+ } - } - else - { -- struct hlsl_ir_store *store; -+ struct hlsl_ir_node *store; -+ struct hlsl_deref deref; -+ -+ if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) -+ return NULL; - -- if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) -+ if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) -+ { -+ hlsl_cleanup_deref(&deref); - return NULL; -- list_add_tail(instrs, &store->node.entry); -+ } -+ hlsl_block_add_instr(block, store); -+ hlsl_cleanup_deref(&deref); - } - - /* Don't use the instruction itself as a source, as this makes structure -@@ -1813,44 +1851,44 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in - * the last instruction in the list, we do need to copy. */ - if (!(copy = hlsl_new_copy(ctx, rhs))) - return NULL; -- list_add_tail(instrs, ©->node.entry); -- return ©->node; -+ hlsl_block_add_instr(block, copy); -+ return copy; - } - --static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, -- struct vkd3d_shader_location loc) -+static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, -+ const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *lhs = node_from_list(instrs); -- struct hlsl_ir_constant *one; -+ struct hlsl_ir_node *lhs = node_from_block(block); -+ struct hlsl_ir_node *one; - - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) -- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, - "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); - -- if (!(one = hlsl_new_int_constant(ctx, 1, &loc))) -+ if (!(one = hlsl_new_int_constant(ctx, 1, loc))) - return false; -- list_add_tail(instrs, &one->node.entry); -+ hlsl_block_add_instr(block, one); - -- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, &one->node)) -+ if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) - return false; - - if (post) - { -- struct hlsl_ir_expr *copy; -+ struct hlsl_ir_node *copy; - - if (!(copy = hlsl_new_copy(ctx, lhs))) - return false; -- list_add_tail(instrs, ©->node.entry); -+ hlsl_block_add_instr(block, copy); - - /* Post increment/decrement expressions are considered const. */ -- if (!(copy->node.data_type = hlsl_type_clone(ctx, copy->node.data_type, 0, HLSL_MODIFIER_CONST))) -+ if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) - return false; - } - - return true; - } - --static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, -+static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) - { - unsigned int src_comp_count = hlsl_type_component_count(src->data_type); -@@ -1861,23 +1899,21 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, - - for (k = 0; k < src_comp_count; ++k) - { -+ struct hlsl_ir_node *conv, *load; - struct hlsl_type *dst_comp_type; -- struct hlsl_ir_store *store; -- struct hlsl_ir_load *load; -- struct hlsl_ir_node *conv; - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) -+ if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) - return; - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - -- if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc))) -+ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) - return; - -- if (!(store = hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))) -+ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; -- list_move_tail(instrs, &block.instrs); -+ hlsl_block_add_block(instrs, &block); - - ++*store_index; - } -@@ -1885,12 +1921,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, - - static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) - { -- if (type->type == HLSL_CLASS_OBJECT) -+ if (type->class == HLSL_CLASS_OBJECT) - return !must_be_in_struct; -- if (type->type == HLSL_CLASS_ARRAY) -+ if (type->class == HLSL_CLASS_ARRAY) - return type_has_object_components(type->e.array.type, must_be_in_struct); - -- if (type->type == HLSL_CLASS_STRUCT) -+ if (type->class == HLSL_CLASS_STRUCT) - { - unsigned int i; - -@@ -1905,12 +1941,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s - - static bool type_has_numeric_components(struct hlsl_type *type) - { -- if (type->type <= HLSL_CLASS_LAST_NUMERIC) -+ if (type->class <= HLSL_CLASS_LAST_NUMERIC) - return true; -- if (type->type == HLSL_CLASS_ARRAY) -+ if (type->class == HLSL_CLASS_ARRAY) - return type_has_numeric_components(type->e.array.type); - -- if (type->type == HLSL_CLASS_STRUCT) -+ if (type->class == HLSL_CLASS_STRUCT) - { - unsigned int i; - -@@ -1923,204 +1959,231 @@ static bool type_has_numeric_components(struct hlsl_type *type) - return false; - } - --static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, -- unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) -+static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, -+ const struct vkd3d_shader_location *loc) - { -- struct parse_variable_def *v, *v_next; -+ modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); -+ if (modifiers) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_modifiers_to_string(ctx, modifiers))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+} -+ -+static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) -+{ -+ struct hlsl_type *basic_type = v->basic_type; - struct hlsl_ir_function_decl *func; -- unsigned int invalid_modifiers; -- struct list *statements_list; -+ struct hlsl_semantic new_semantic; -+ uint32_t modifiers = v->modifiers; -+ bool unbounded_res_array = false; - struct hlsl_ir_var *var; - struct hlsl_type *type; - bool local = true; -+ char *var_name; -+ unsigned int i; - -- if (basic_type->type == HLSL_CLASS_MATRIX) -- assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ assert(basic_type); - -- if (!(statements_list = make_empty_list(ctx))) -- { -- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -- free_parse_variable_def(v); -- vkd3d_free(var_list); -- return NULL; -- } -+ if (basic_type->class == HLSL_CLASS_MATRIX) -+ assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - -- if (!var_list) -- return statements_list; -+ type = basic_type; - -- invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); -- if (invalid_modifiers) -+ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) - { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) -- hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -+ for (i = 0; i < v->arrays.count; ++i) -+ unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); - } - -- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -+ if (unbounded_res_array) - { -- bool unbounded_res_array = false; -- unsigned int i; -- -- type = basic_type; -- -- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) -+ if (v->arrays.count == 1) - { -- for (i = 0; i < v->arrays.count; ++i) -- unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); -+ hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); -+ return; - } -- -- if (unbounded_res_array) -+ else - { -- if (v->arrays.count == 1) -- { -- hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); -- free_parse_variable_def(v); -- continue; -- } -- else -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Unbounded resource arrays cannot be multi-dimensional."); -- } -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Unbounded resource arrays cannot be multi-dimensional."); - } -- else -+ } -+ else -+ { -+ for (i = 0; i < v->arrays.count; ++i) - { -- for (i = 0; i < v->arrays.count; ++i) -+ if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - { -- if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -- { -- unsigned int size = initializer_size(&v->initializer); -- unsigned int elem_components = hlsl_type_component_count(type); -- -- if (i < v->arrays.count - 1) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Only innermost array size can be implicit."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else if (elem_components == 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Cannot declare an implicit size array of a size 0 type."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else if (size == 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Implicit size arrays need to be initialized."); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -+ unsigned int size = initializer_size(&v->initializer); -+ unsigned int elem_components = hlsl_type_component_count(type); - -- } -- else if (size % elem_components != 0) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Cannot initialize implicit size array with %u components, expected a multiple of %u.", -- size, elem_components); -- free_parse_initializer(&v->initializer); -- v->initializer.args_count = 0; -- } -- else -- { -- v->arrays.sizes[i] = size / elem_components; -- } -+ if (i < v->arrays.count - 1) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Only innermost array size can be implicit."); -+ v->initializer.args_count = 0; -+ } -+ else if (elem_components == 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Cannot declare an implicit size array of a size 0 type."); -+ v->initializer.args_count = 0; -+ } -+ else if (size == 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Implicit size arrays need to be initialized."); -+ v->initializer.args_count = 0; -+ } -+ else if (size % elem_components != 0) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Cannot initialize implicit size array with %u components, expected a multiple of %u.", -+ size, elem_components); -+ v->initializer.args_count = 0; -+ } -+ else -+ { -+ v->arrays.sizes[i] = size / elem_components; - } -- type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); - } -+ type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); - } -- vkd3d_free(v->arrays.sizes); -+ } - -- if (!(var = hlsl_new_var(ctx, v->name, type, v->loc, &v->semantic, modifiers, &v->reg_reservation))) -+ if (!(var_name = vkd3d_strdup(v->name))) -+ return; -+ -+ new_semantic = v->semantic; -+ if (v->semantic.name) -+ { -+ if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) - { -- free_parse_variable_def(v); -- continue; -+ vkd3d_free(var_name); -+ return; - } -+ } - -- var->buffer = ctx->cur_buffer; -+ if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) -+ { -+ hlsl_cleanup_semantic(&new_semantic); -+ vkd3d_free(var_name); -+ return; -+ } - -- if (ctx->cur_scope == ctx->globals) -- { -- local = false; -+ var->buffer = ctx->cur_buffer; - -- if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); -+ if (var->buffer == ctx->globals_buffer) -+ { -+ if (var->reg_reservation.offset_type) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() is only allowed inside constant buffer declarations."); -+ } - -- /* Mark it as uniform. We need to do this here since synthetic -- * variables also get put in the global scope, but shouldn't be -- * considered uniforms, and we have no way of telling otherwise. */ -- if (!(modifiers & HLSL_STORAGE_STATIC)) -- var->storage_modifiers |= HLSL_STORAGE_UNIFORM; -+ if (ctx->cur_scope == ctx->globals) -+ { -+ local = false; - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -- type_has_object_components(var->data_type, true)) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Target profile doesn't support objects as struct members in uniform variables.\n"); -- } -+ if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); - -- if ((func = hlsl_get_func_decl(ctx, var->name))) -- { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -- "'%s' is already defined as a function.", var->name); -- hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, -- "'%s' was previously defined here.", var->name); -- } -- } -- else -- { -- static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED -- | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; -+ /* Mark it as uniform. We need to do this here since synthetic -+ * variables also get put in the global scope, but shouldn't be -+ * considered uniforms, and we have no way of telling otherwise. */ -+ if (!(modifiers & HLSL_STORAGE_STATIC)) -+ var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if (modifiers & invalid) -- { -- struct vkd3d_string_buffer *string; -+ if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && -+ type_has_object_components(var->data_type, true)) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Target profile doesn't support objects as struct members in uniform variables."); -+ } - -- if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers '%s' are not allowed on local variables.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- } -- if (var->semantic.name) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -- "Semantics are not allowed on local variables."); -+ if ((func = hlsl_get_func_decl(ctx, var->name))) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -+ "'%s' is already defined as a function.", var->name); -+ hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, -+ "'%s' was previously defined here.", var->name); - } -+ } -+ else -+ { -+ static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED -+ | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; - -- if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -- && type_has_object_components(var->data_type, false)) -+ if (modifiers & invalid) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Static variables cannot have both numeric and resource components."); -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers '%s' are not allowed on local variables.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); - } -+ if (var->semantic.name) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "Semantics are not allowed on local variables."); - -- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count -- && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) -+ if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -- "Const variable \"%s\" is missing an initializer.", var->name); -- hlsl_free_var(var); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -- continue; -+ "Const variable \"%s\" is missing an initializer.", var->name); - } -+ } -+ -+ if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -+ && type_has_object_components(var->data_type, false)) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Static variables cannot have both numeric and resource components."); -+ } -+ -+ if (!hlsl_add_var(ctx, var, local)) -+ { -+ struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -+ -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -+ "Variable \"%s\" was already declared in this scope.", var->name); -+ hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); -+ hlsl_free_var(var); -+ return; -+ } -+} - -- if (!hlsl_add_var(ctx, var, local)) -+static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) -+{ -+ struct parse_variable_def *v, *v_next; -+ struct hlsl_block *initializers; -+ struct hlsl_ir_var *var; -+ struct hlsl_type *type; -+ -+ if (!(initializers = make_empty_block(ctx))) -+ { -+ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) - { -- struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); -+ free_parse_variable_def(v); -+ } -+ vkd3d_free(var_list); -+ return NULL; -+ } - -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, -- "Variable \"%s\" was already declared in this scope.", var->name); -- hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); -- hlsl_free_var(var); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -+ LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) -+ { -+ /* If this fails, the variable failed to be declared. */ -+ if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) -+ { -+ free_parse_variable_def(v); - continue; - } -+ type = var->data_type; - - if (v->initializer.args_count) - { -@@ -2135,8 +2198,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u components in initializer, but got %u.", - hlsl_type_component_count(type), size); -- free_parse_initializer(&v->initializer); -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - -@@ -2148,58 +2210,55 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t - } - else - { -- struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, var->loc); -+ struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); - - assert(v->initializer.args_count == 1); -- list_add_tail(v->initializer.instrs, &load->node.entry); -+ hlsl_block_add_instr(v->initializer.instrs, &load->node); - add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); - } - -- if (modifiers & HLSL_STORAGE_STATIC) -- list_move_tail(&ctx->static_initializers, v->initializer.instrs); -+ if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); - else -- list_move_tail(statements_list, v->initializer.instrs); -- vkd3d_free(v->initializer.args); -- vkd3d_free(v->initializer.instrs); -+ hlsl_block_add_block(initializers, v->initializer.instrs); - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -- struct hlsl_ir_constant *zero; -- struct hlsl_ir_store *store; -- struct hlsl_ir_node *cast; -+ struct hlsl_ir_node *cast, *store, *zero; - - /* Initialize statics to zero by default. */ - - if (type_has_object_components(var->data_type, false)) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - - if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } -- list_add_tail(&ctx->static_initializers, &zero->node.entry); -+ hlsl_block_add_instr(&ctx->static_initializers, zero); - -- if (!(cast = add_cast(ctx, &ctx->static_initializers, &zero->node, var->data_type, &var->loc))) -+ if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } - - if (!(store = hlsl_new_simple_store(ctx, var, cast))) - { -- vkd3d_free(v); -+ free_parse_variable_def(v); - continue; - } -- list_add_tail(&ctx->static_initializers, &store->node.entry); -+ hlsl_block_add_instr(&ctx->static_initializers, store); - } -- vkd3d_free(v); -+ free_parse_variable_def(v); - } -+ - vkd3d_free(var_list); -- return statements_list; -+ return initializers; - } - - struct find_function_call_args -@@ -2271,34 +2330,120 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, - return args.decl; - } - --static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, -- const struct parse_initializer *params, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) -+static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) - { -- struct hlsl_type *type = arg->data_type; -- -- if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) -- return arg; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - -- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -- return add_implicit_conversion(ctx, params->instrs, arg, type, loc); -+ return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); - } - --static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, -- struct hlsl_type *type, const struct vkd3d_shader_location *loc) -+static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ const struct parse_initializer *args, const struct vkd3d_shader_location *loc) - { -+ struct hlsl_ir_node *call; - unsigned int i; - -- for (i = 0; i < params->args_count; ++i) -+ assert(args->args_count == func->parameters.count); -+ -+ for (i = 0; i < func->parameters.count; ++i) - { -- struct hlsl_ir_node *new_arg; -+ struct hlsl_ir_var *param = func->parameters.vars[i]; -+ struct hlsl_ir_node *arg = args->args[i]; - -- if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) -- return false; -- params->args[i] = new_arg; -+ if (!hlsl_types_are_equal(arg->data_type, param->data_type)) -+ { -+ struct hlsl_ir_node *cast; -+ -+ if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) -+ return false; -+ args->args[i] = cast; -+ arg = cast; -+ } -+ -+ if (param->storage_modifiers & HLSL_STORAGE_IN) -+ { -+ struct hlsl_ir_node *store; -+ -+ if (!(store = hlsl_new_simple_store(ctx, param, arg))) -+ return false; -+ hlsl_block_add_instr(args->instrs, store); -+ } - } - -- return true; --} -+ if (!(call = hlsl_new_call(ctx, func, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, call); -+ -+ for (i = 0; i < func->parameters.count; ++i) -+ { -+ struct hlsl_ir_var *param = func->parameters.vars[i]; -+ struct hlsl_ir_node *arg = args->args[i]; -+ -+ if (param->storage_modifiers & HLSL_STORAGE_OUT) -+ { -+ struct hlsl_ir_load *load; -+ -+ if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) -+ hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, -+ "Output argument to \"%s\" is const.", func->func->name); -+ -+ if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, &load->node); -+ -+ if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) -+ return false; -+ } -+ } -+ -+ if (func->return_var) -+ { -+ struct hlsl_ir_load *load; -+ -+ if (!(load = hlsl_new_var_load(ctx, func->return_var, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, &load->node); -+ } -+ else -+ { -+ struct hlsl_ir_node *expr; -+ -+ if (!(expr = hlsl_new_void_expr(ctx, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, expr); -+ } -+ -+ return true; -+} -+ -+static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_type *type = arg->data_type; -+ -+ if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) -+ return arg; -+ -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ return add_implicit_conversion(ctx, params->instrs, arg, type, loc); -+} -+ -+static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+ struct hlsl_type *type, const struct vkd3d_shader_location *loc) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < params->args_count; ++i) -+ { -+ struct hlsl_ir_node *new_arg; -+ -+ if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) -+ return false; -+ params->args[i] = new_arg; -+ } -+ -+ return true; -+} - - static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -@@ -2315,12 +2460,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * - - base = expr_common_base_type(base, arg_type->base_type); - -- if (arg_type->type == HLSL_CLASS_VECTOR) -+ if (arg_type->class == HLSL_CLASS_VECTOR) - { - vectors = true; - dimx = min(dimx, arg_type->dimx); - } -- else if (arg_type->type == HLSL_CLASS_MATRIX) -+ else if (arg_type->class == HLSL_CLASS_MATRIX) - { - matrices = true; - dimx = min(dimx, arg_type->dimx); -@@ -2369,7 +2514,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; - -- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - - return convert_args(ctx, params, type, loc); - } -@@ -2383,32 +2528,78 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, - static bool intrinsic_all(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg = params->args[0], *mul; -- struct hlsl_ir_constant *one, *zero; -- struct hlsl_ir_load *load; -+ struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; - unsigned int i, count; - - if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) - return false; -- list_add_tail(params->instrs, &one->node.entry); -+ hlsl_block_add_instr(params->instrs, one); - - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) - return false; -- list_add_tail(params->instrs, &zero->node.entry); -+ hlsl_block_add_instr(params->instrs, zero); - -- mul = &one->node; -+ mul = one; - - count = hlsl_type_component_count(arg->data_type); - for (i = 0; i < count; ++i) - { -- if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) -+ return false; -+ -+ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) -+ return false; -+ } -+ -+ return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); -+} -+ -+static bool intrinsic_any(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; -+ unsigned int i, count; -+ -+ if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) -+ { -+ hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); -+ return false; -+ } -+ -+ if (arg->data_type->base_type == HLSL_TYPE_FLOAT) -+ { -+ if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, zero); -+ -+ if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) - return false; - -- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &load->node, mul, loc))) -+ return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); -+ } -+ else if (arg->data_type->base_type == HLSL_TYPE_BOOL) -+ { -+ if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) - return false; -+ hlsl_block_add_instr(params->instrs, bfalse); -+ -+ or = bfalse; -+ -+ count = hlsl_type_component_count(arg->data_type); -+ for (i = 0; i < count; ++i) -+ { -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) -+ return false; -+ -+ if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) -+ return false; -+ } -+ -+ return true; - } - -- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, &zero->node, loc); -+ hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); -+ return false; - } - - /* Find the type corresponding to the given source type, with the same -@@ -2416,7 +2607,30 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, - static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) - { -- return hlsl_get_numeric_type(ctx, type->type, base_type, type->dimx, type->dimy); -+ return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); -+} -+ -+static bool intrinsic_asfloat(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *data_type; -+ -+ data_type = params->args[0]->data_type; -+ if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, data_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong argument type of asfloat(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", -+ string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); -+ -+ operands[0] = params->args[0]; -+ return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); - } - - static bool intrinsic_asuint(struct hlsl_ctx *ctx, -@@ -2469,6 +2683,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); - } - -+static bool intrinsic_clip(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *condition, *jump; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ -+ condition = params->args[0]; -+ -+ if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, condition->data_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, jump); -+ -+ return true; -+} -+ - static bool intrinsic_cos(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2483,7 +2725,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, - static bool intrinsic_cross(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_swizzle *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; -+ struct hlsl_ir_node *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; - struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; - struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg, *mul1, *mul2; - struct hlsl_type *cast_type; -@@ -2504,35 +2746,99 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, - - if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg1_swzl1->node.entry); -+ hlsl_block_add_instr(params->instrs, arg1_swzl1); - - if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg2_swzl1->node.entry); -+ hlsl_block_add_instr(params->instrs, arg2_swzl1); - -- if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, -- &arg1_swzl1->node, &arg2_swzl1->node, loc))) -+ if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) - return false; - -- if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, *loc))) -+ if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) - return false; -- list_add_tail(params->instrs, &mul1_neg->entry); -+ hlsl_block_add_instr(params->instrs, mul1_neg); - - if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg1_swzl2->node.entry); -+ hlsl_block_add_instr(params->instrs, arg1_swzl2); - - if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) - return false; -- list_add_tail(params->instrs, &arg2_swzl2->node.entry); -+ hlsl_block_add_instr(params->instrs, arg2_swzl2); - -- if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, -- &arg1_swzl2->node, &arg2_swzl2->node, loc))) -+ if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) - return false; - - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, mul2, mul1_neg, loc); - } - -+static bool intrinsic_ddx(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); -+} -+ -+static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); -+} -+ -+static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); -+} -+ -+static bool intrinsic_ddy(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); -+} -+ -+static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); -+} -+ -+static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); -+} -+ - static bool intrinsic_distance(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2565,8 +2871,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, - static bool intrinsic_exp(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_constant *coeff; -- struct hlsl_ir_node *arg, *mul; -+ struct hlsl_ir_node *arg, *mul, *coeff; - - if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) - return false; -@@ -2574,9 +2879,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, - /* 1/ln(2) */ - if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) - return false; -- list_add_tail(params->instrs, &coeff->node.entry); -+ hlsl_block_add_instr(params->instrs, coeff); - -- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &coeff->node, params->args[0], loc))) -+ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) - return false; - - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); -@@ -2604,6 +2909,47 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); - } - -+static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer *params, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select, *zero; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ static const struct hlsl_constant_value zero_value; -+ -+ if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ if (!(y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) -+ return false; -+ -+ if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) -+ return false; -+ -+ if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, zero); -+ -+ if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) -+ return false; -+ -+ if (!(frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, abs, loc))) -+ return false; -+ -+ if (!(neg_frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, frac, loc))) -+ return false; -+ -+ if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) -+ return false; -+ -+ operands[0] = ge; -+ operands[1] = frac; -+ operands[2] = neg_frac; -+ if (!(select = add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, x->data_type, loc))) -+ return false; -+ -+ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); -+} -+ - static bool intrinsic_frac(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2635,7 +2981,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, - struct hlsl_type *type = params->args[0]->data_type; - struct hlsl_ir_node *arg, *dot; - -- if (type->type == HLSL_CLASS_MATRIX) -+ if (type->class == HLSL_CLASS_MATRIX) - { - struct vkd3d_string_buffer *string; - -@@ -2675,7 +3021,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, - } - - static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, -- struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, -+ struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *log, *mul; -@@ -2692,91 +3038,77 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, - static bool intrinsic_lit(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow; -- struct hlsl_ir_constant *init, *zero; -- struct hlsl_ir_node *n_l, *n_h, *m; -- struct hlsl_ir_node *diffuse; -- struct hlsl_ir_store *store; -- struct hlsl_deref var_deref; -- struct hlsl_type *ret_type; -- struct hlsl_ir_load *load; -- struct hlsl_ir_var *var; -- struct hlsl_block block; -+ struct hlsl_ir_function_decl *func; - -- if (params->args[0]->data_type->type != HLSL_CLASS_SCALAR -- || params->args[1]->data_type->type != HLSL_CLASS_SCALAR -- || params->args[2]->data_type->type != HLSL_CLASS_SCALAR) -+ static const char body[] = -+ "float4 lit(float n_l, float n_h, float m)\n" -+ "{\n" -+ " float4 ret;\n" -+ " ret.xw = 1.0;\n" -+ " ret.y = max(n_l, 0);\n" -+ " ret.z = (n_l < 0 || n_h < 0) ? 0 : pow(n_h, m);\n" -+ " return ret;\n" -+ "}"; -+ -+ if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR -+ || params->args[1]->data_type->class != HLSL_CLASS_SCALAR -+ || params->args[2]->data_type->class != HLSL_CLASS_SCALAR) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); - return false; - } - -- if (!(n_l = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -- -- if (!(n_h = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) -+ if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) - return false; - -- if (!(m = intrinsic_float_convert_arg(ctx, params, params->args[2], loc))) -- return false; -- -- ret_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); -- -- if (!(var = hlsl_new_synthetic_var(ctx, "lit", ret_type, loc))) -- return false; -- hlsl_init_simple_deref_from_var(&var_deref, var); -+ return add_user_call(ctx, func, params, loc); -+} - -- if (!(init = hlsl_new_constant(ctx, ret_type, loc))) -- return false; -- init->value[0].f = 1.0f; -- init->value[1].f = 0.0f; -- init->value[2].f = 0.0f; -- init->value[3].f = 1.0f; -- list_add_tail(params->instrs, &init->node.entry); -+static bool intrinsic_log(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *log, *arg, *coeff; - -- if (!(store = hlsl_new_simple_store(ctx, var, &init->node))) -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) - return false; -- list_add_tail(params->instrs, &store->node.entry); - -- if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) -+ if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) - return false; -- list_add_tail(params->instrs, &zero->node.entry); - -- /* Diffuse component. */ -- if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, &zero->node, loc))) -+ /* ln(2) */ -+ if (!(coeff = hlsl_new_float_constant(ctx, 0.69314718055f, loc))) - return false; - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse))) -- return false; -- list_move_tail(params->instrs, &block.instrs); -+ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); -+} - -- /* Specular component. */ -- if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, -- n_h, &zero->node, loc))) -- return false; -+static bool intrinsic_log10(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *log, *arg, *coeff; - -- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, -- n_l, &zero->node, loc))) -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) - return false; - -- if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) -+ if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) - return false; - -- if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) -+ /* 1 / log2(10) */ -+ if (!(coeff = hlsl_new_float_constant(ctx, 0.301029996f, loc))) - return false; - -- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, &zero->node, specular_pow))) -- return false; -+ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); -+} - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 2, &load->node))) -- return false; -- list_move_tail(params->instrs, &block.instrs); -+static bool intrinsic_log2(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; - -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); - -- return true; -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); - } - - static bool intrinsic_max(struct hlsl_ctx *ctx, -@@ -2808,15 +3140,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - -- if (arg1->data_type->type == HLSL_CLASS_SCALAR || arg2->data_type->type == HLSL_CLASS_SCALAR) -+ if (arg1->data_type->class == HLSL_CLASS_SCALAR || arg2->data_type->class == HLSL_CLASS_SCALAR) - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1, arg2, loc); - -- if (arg1->data_type->type == HLSL_CLASS_VECTOR) -+ if (arg1->data_type->class == HLSL_CLASS_VECTOR) - { - vect_count++; - cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); - } -- if (arg2->data_type->type == HLSL_CLASS_VECTOR) -+ if (arg2->data_type->class == HLSL_CLASS_VECTOR) - { - vect_count++; - cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); -@@ -2854,21 +3186,21 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - for (j = 0; j < matrix_type->dimy; ++j) - { - struct hlsl_ir_node *instr = NULL; -- struct hlsl_ir_store *store; - struct hlsl_block block; - - for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) - { -- struct hlsl_ir_load *value1, *value2; -- struct hlsl_ir_node *mul; -+ struct hlsl_ir_node *value1, *value2, *mul; - -- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) -+ if (!(value1 = hlsl_add_load_component(ctx, params->instrs, -+ cast1, j * cast1->data_type->dimx + k, loc))) - return false; - -- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) -+ if (!(value2 = hlsl_add_load_component(ctx, params->instrs, -+ cast2, k * cast2->data_type->dimx + i, loc))) - return false; - -- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &value1->node, &value2->node, loc))) -+ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) - return false; - - if (instr) -@@ -2882,15 +3214,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - } - } - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr))) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); - } - } - -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) -+ if (!(load = hlsl_new_var_load(ctx, var, loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); -+ hlsl_block_add_instr(params->instrs, &load->node); - - return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); - } -@@ -2901,7 +3233,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, - struct hlsl_type *type = params->args[0]->data_type; - struct hlsl_ir_node *dot, *rsq, *arg; - -- if (type->type == HLSL_CLASS_MATRIX) -+ if (type->class == HLSL_CLASS_MATRIX) - { - struct vkd3d_string_buffer *string; - -@@ -2986,74 +3318,80 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); - } - --static bool intrinsic_sin(struct hlsl_ctx *ctx, -+static bool intrinsic_sign(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg; -- -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -- return false; -- -- return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); --} -+ struct hlsl_ir_node *lt, *neg, *op1, *op2, *zero, *arg = params->args[0]; -+ static const struct hlsl_constant_value zero_value; - --/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ --static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, -- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res; -- struct hlsl_ir_constant *one, *minus_two, *three; -+ struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, -+ arg->data_type->dimx, arg->data_type->dimy); - -- if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) - return false; -+ hlsl_block_add_instr(params->instrs, zero); - -- min_arg = params->args[0]; -- max_arg = params->args[1]; -- x_arg = params->args[2]; -+ /* Check if 0 < arg, cast bool to int */ - -- if (!(min_arg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, min_arg, loc))) -+ if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) - return false; - -- if (!(p_num = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, x_arg, min_arg, loc))) -+ if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) - return false; - -- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, max_arg, min_arg, loc))) -- return false; -+ /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ - -- if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) -+ if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) - return false; -- list_add_tail(params->instrs, &one->node.entry); - -- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc))) -+ if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) - return false; - -- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) -+ if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) - return false; - -- if (!(p = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, p, loc))) -- return false; -+ /* Adding these two together will make 1 when > 0, -1 when < 0, and 0 when neither */ -+ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, neg, op1, loc); -+} - -- if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) -- return false; -- list_add_tail(params->instrs, &minus_two->node.entry); -+static bool intrinsic_sin(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; - -- if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) - return false; -- list_add_tail(params->instrs, &three->node.entry); - -- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &minus_two->node, p, loc))) -- return false; -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); -+} - -- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &three->node, res, loc))) -- return false; -+/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */ -+static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s smoothstep(%s low, %s high, %s x)\n" -+ "{\n" -+ " %s p = saturate((x - low) / (high - low));\n" -+ " return (p * p) * (3 - 2 * p);\n" -+ "}"; - -- if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) -+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - -- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, res, loc))) -+ if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "smoothstep", body); -+ vkd3d_free(body); -+ if (!func) - return false; - -- return true; -+ return add_user_call(ctx, func, params, loc); - } - - static bool intrinsic_sqrt(struct hlsl_ctx *ctx, -@@ -3081,7 +3419,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, - return false; - - type = ge->data_type; -- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); - } - -@@ -3090,9 +3428,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; - const struct hlsl_type *sampler_type; -- struct hlsl_ir_resource_load *load; -- struct hlsl_ir_load *sampler_load; -- struct hlsl_ir_node *coords; -+ struct hlsl_ir_node *coords, *load; - - if (params->args_count != 2 && params->args_count != 4) - { -@@ -3103,11 +3439,11 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (params->args_count == 4) - { -- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); -+ hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); - } - - sampler_type = params->args[0]->data_type; -- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) - { - struct vkd3d_string_buffer *string; -@@ -3118,27 +3454,63 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - name, ctx->builtin_types.sampler[dim]->name, string->buffer); - hlsl_release_string_buffer(ctx, string); - } -- else -- { -- /* Only HLSL_IR_LOAD can return an object. */ -- sampler_load = hlsl_ir_load(params->args[0]); -- -- load_params.resource = sampler_load->src; -- } - - if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) -- coords = params->args[1]; -+ { -+ return false; -+ } -+ -+ /* tex1D() functions never produce 1D resource declarations. For newer profiles half offset -+ is used for the second coordinate, while older ones appear to replicate first coordinate.*/ -+ if (dim == HLSL_SAMPLER_DIM_1D) -+ { -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_node *half; -+ struct hlsl_ir_var *var; -+ unsigned int idx = 0; -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) -+ return false; -+ -+ initialize_var_components(ctx, params->instrs, var, &idx, coords); -+ if (shader_profile_version_ge(ctx, 4, 0)) -+ { -+ if (!(half = hlsl_new_float_constant(ctx, 0.5f, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, half); -+ -+ initialize_var_components(ctx, params->instrs, var, &idx, half); -+ } -+ else -+ initialize_var_components(ctx, params->instrs, var, &idx, coords); -+ -+ if (!(load = hlsl_new_var_load(ctx, var, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, &load->node); -+ -+ coords = &load->node; -+ -+ dim = HLSL_SAMPLER_DIM_2D; -+ } - - load_params.coords = coords; -+ load_params.resource = params->args[0]; - load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); -+ load_params.sampling_dim = dim; - - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); -+ hlsl_block_add_instr(params->instrs, load); - return true; - } - -+static bool intrinsic_tex1D(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); -+} -+ - static bool intrinsic_tex2D(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3151,32 +3523,39 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); - } - -+static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); -+} -+ - static bool intrinsic_transpose(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *arg = params->args[0]; - struct hlsl_type *arg_type = arg->data_type; -+ struct hlsl_ir_load *var_load; - struct hlsl_deref var_deref; - struct hlsl_type *mat_type; -- struct hlsl_ir_load *load; -+ struct hlsl_ir_node *load; - struct hlsl_ir_var *var; - unsigned int i, j; - -- if (arg_type->type != HLSL_CLASS_SCALAR && arg_type->type != HLSL_CLASS_MATRIX) -+ if (arg_type->class != HLSL_CLASS_SCALAR && arg_type->class != HLSL_CLASS_MATRIX) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, arg_type))) - hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", -+ "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", - string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } - -- if (arg_type->type == HLSL_CLASS_SCALAR) -+ if (arg_type->class == HLSL_CLASS_SCALAR) - { -- list_add_tail(params->instrs, &arg->entry); -+ hlsl_block_add_instr(params->instrs, arg); - return true; - } - -@@ -3190,21 +3569,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, - { - for (j = 0; j < arg_type->dimy; ++j) - { -- struct hlsl_ir_store *store; - struct hlsl_block block; - -- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) -+ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) - return false; - -- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, &load->node))) -+ if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) - return false; -- list_move_tail(params->instrs, &block.instrs); -+ hlsl_block_add_block(params->instrs, &block); -+ } -+ } -+ -+ if (!(var_load = hlsl_new_var_load(ctx, var, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, &var_load->node); -+ -+ return true; -+} -+ -+static bool intrinsic_trunc(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, arg, loc); -+} -+ -+static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; -+ struct hlsl_type *arg_type = arg->data_type; -+ -+ if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, arg_type))) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Wrong argument type '%s'.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); - } -+ -+ return false; -+ } -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) -+ return false; -+ -+ if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, c); -+ -+ if (arg_type->class == HLSL_CLASS_VECTOR) -+ { -+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, swizzle); -+ -+ arg = swizzle; - } - -- if (!(load = hlsl_new_var_load(ctx, var, *loc))) -+ if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) - return false; -- list_add_tail(params->instrs, &load->node.entry); -+ -+ if (shader_profile_version_ge(ctx, 4, 0)) -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); - - return true; - } -@@ -3220,22 +3653,36 @@ static const struct intrinsic_function - intrinsic_functions[] = - { - /* Note: these entries should be kept in alphabetical order. */ -+ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, - {"abs", 1, true, intrinsic_abs}, - {"all", 1, true, intrinsic_all}, -+ {"any", 1, true, intrinsic_any}, -+ {"asfloat", 1, true, intrinsic_asfloat}, - {"asuint", -1, true, intrinsic_asuint}, - {"clamp", 3, true, intrinsic_clamp}, -+ {"clip", 1, true, intrinsic_clip}, - {"cos", 1, true, intrinsic_cos}, - {"cross", 2, true, intrinsic_cross}, -+ {"ddx", 1, true, intrinsic_ddx}, -+ {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, -+ {"ddx_fine", 1, true, intrinsic_ddx_fine}, -+ {"ddy", 1, true, intrinsic_ddy}, -+ {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, -+ {"ddy_fine", 1, true, intrinsic_ddy_fine}, - {"distance", 2, true, intrinsic_distance}, - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, - {"exp2", 1, true, intrinsic_exp2}, - {"floor", 1, true, intrinsic_floor}, -+ {"fmod", 2, true, intrinsic_fmod}, - {"frac", 1, true, intrinsic_frac}, - {"ldexp", 2, true, intrinsic_ldexp}, - {"length", 1, true, intrinsic_length}, - {"lerp", 3, true, intrinsic_lerp}, - {"lit", 3, true, intrinsic_lit}, -+ {"log", 1, true, intrinsic_log}, -+ {"log10", 1, true, intrinsic_log10}, -+ {"log2", 1, true, intrinsic_log2}, - {"max", 2, true, intrinsic_max}, - {"min", 2, true, intrinsic_min}, - {"mul", 2, true, intrinsic_mul}, -@@ -3245,13 +3692,17 @@ intrinsic_functions[] = - {"round", 1, true, intrinsic_round}, - {"rsqrt", 1, true, intrinsic_rsqrt}, - {"saturate", 1, true, intrinsic_saturate}, -+ {"sign", 1, true, intrinsic_sign}, - {"sin", 1, true, intrinsic_sin}, - {"smoothstep", 3, true, intrinsic_smoothstep}, - {"sqrt", 1, true, intrinsic_sqrt}, - {"step", 2, true, intrinsic_step}, -+ {"tex1D", -1, false, intrinsic_tex1D}, - {"tex2D", -1, false, intrinsic_tex2D}, - {"tex3D", -1, false, intrinsic_tex3D}, -+ {"texCUBE", -1, false, intrinsic_texCUBE}, - {"transpose", 1, true, intrinsic_transpose}, -+ {"trunc", 1, true, intrinsic_trunc}, - }; - - static int intrinsic_function_name_compare(const void *a, const void *b) -@@ -3261,7 +3712,7 @@ static int intrinsic_function_name_compare(const void *a, const void *b) - return strcmp(a, func->name); - } - --static struct list *add_call(struct hlsl_ctx *ctx, const char *name, -+static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, - struct parse_initializer *args, const struct vkd3d_shader_location *loc) - { - struct intrinsic_function *intrinsic; -@@ -3269,79 +3720,8 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - - if ((decl = find_function_call(ctx, name, args, loc))) - { -- struct hlsl_ir_node *call; -- unsigned int i; -- -- assert(args->args_count == decl->parameters.count); -- -- for (i = 0; i < decl->parameters.count; ++i) -- { -- struct hlsl_ir_var *param = decl->parameters.vars[i]; -- struct hlsl_ir_node *arg = args->args[i]; -- -- if (!hlsl_types_are_equal(arg->data_type, param->data_type)) -- { -- struct hlsl_ir_node *cast; -- -- if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) -- goto fail; -- args->args[i] = cast; -- arg = cast; -- } -- -- if (param->storage_modifiers & HLSL_STORAGE_IN) -- { -- struct hlsl_ir_store *store; -- -- if (!(store = hlsl_new_simple_store(ctx, param, arg))) -- goto fail; -- list_add_tail(args->instrs, &store->node.entry); -- } -- } -- -- if (!(call = hlsl_new_call(ctx, decl, loc))) -+ if (!add_user_call(ctx, decl, args, loc)) - goto fail; -- list_add_tail(args->instrs, &call->entry); -- -- for (i = 0; i < decl->parameters.count; ++i) -- { -- struct hlsl_ir_var *param = decl->parameters.vars[i]; -- struct hlsl_ir_node *arg = args->args[i]; -- -- if (param->storage_modifiers & HLSL_STORAGE_OUT) -- { -- struct hlsl_ir_load *load; -- -- if (arg->data_type->modifiers & HLSL_MODIFIER_CONST) -- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, -- "Output argument to \"%s\" is const.", decl->func->name); -- -- if (!(load = hlsl_new_var_load(ctx, param, arg->loc))) -- goto fail; -- list_add_tail(args->instrs, &load->node.entry); -- -- if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) -- goto fail; -- } -- } -- -- if (decl->return_var) -- { -- struct hlsl_ir_load *load; -- -- if (!(load = hlsl_new_var_load(ctx, decl->return_var, *loc))) -- goto fail; -- list_add_tail(args->instrs, &load->node.entry); -- } -- else -- { -- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -- struct hlsl_ir_node *expr; -- -- if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) -- goto fail; -- list_add_tail(args->instrs, &expr->entry); -- } - } - else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), - sizeof(*intrinsic_functions), intrinsic_function_name_compare))) -@@ -3360,7 +3740,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, - - for (i = 0; i < args->args_count; ++i) - { -- if (args->args[i]->data_type->type > HLSL_CLASS_LAST_NUMERIC) -+ if (args->args[i]->data_type->class > HLSL_CLASS_LAST_NUMERIC) - { - struct vkd3d_string_buffer *string; - -@@ -3396,21 +3776,21 @@ fail: - return NULL; - } - --static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, -- struct parse_initializer *params, struct vkd3d_shader_location loc) -+static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, -+ struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - unsigned int i, idx = 0; - -- if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, &loc))) -+ if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) - return NULL; - - for (i = 0; i < params->args_count; ++i) - { - struct hlsl_ir_node *arg = params->args[i]; - -- if (arg->data_type->type == HLSL_CLASS_OBJECT) -+ if (arg->data_type->class == HLSL_CLASS_OBJECT) - { - struct vkd3d_string_buffer *string; - -@@ -3426,7 +3806,7 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -- list_add_tail(params->instrs, &load->node.entry); -+ hlsl_block_add_instr(params->instrs, &load->node); - - vkd3d_free(params->args); - return params->instrs; -@@ -3455,320 +3835,704 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) - } - } - --static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, -+static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct hlsl_type *object_type, -+ const char *method, const struct vkd3d_shader_location *loc) -+{ -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, object_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, -+ "Method '%s' is not defined on type '%s'.", method, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+} -+ -+static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -- struct hlsl_ir_load *object_load; -+ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; -+ struct hlsl_ir_node *load; -+ bool multisampled; -+ -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY) -+ { -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } - -- if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; -+ -+ if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", -+ 1 + multisampled, 3 + multisampled, params->args_count); -+ return false; -+ } -+ if (multisampled) -+ { -+ if (!(load_params.sample_index = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) -+ return false; -+ } -+ -+ assert(offset_dim); -+ if (params->args_count > 1 + multisampled) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[1 + multisampled], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -+ return false; -+ } -+ if (params->args_count > 2 + multisampled) -+ { -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ } -+ -+ /* +1 for the mipmap level for non-multisampled textures */ -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) -+ return false; -+ -+ load_params.format = object_type->e.resource_format; -+ load_params.resource = object; -+ -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ -+static bool add_sample_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; -+ const struct hlsl_type *sampler_type; -+ struct hlsl_ir_node *load; -+ -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) -+ { -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } -+ -+ if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", -+ 4 + !!offset_dim, params->args_count); -+ return false; -+ } -+ -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - -- if ((string = hlsl_type_to_string(ctx, object_type))) -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Type '%s' does not have methods.", string->buffer); -+ "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ return false; -+ -+ if (offset_dim && params->args_count > 2) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -+ return false; -+ } -+ -+ if (params->args_count > 2 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "Sample() clamp parameter."); -+ if (params->args_count > 3 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ -+ load_params.format = object_type->e.resource_format; -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; -+ -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ -+ return true; -+} -+ -+static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ struct hlsl_resource_load_params load_params = { 0 }; -+ const struct hlsl_type *sampler_type; -+ struct hlsl_ir_node *load; -+ -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) -+ { -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } -+ -+ if (!strcmp(name, "SampleCmpLevelZero")) -+ load_params.type = HLSL_RESOURCE_SAMPLE_CMP_LZ; -+ else -+ load_params.type = HLSL_RESOURCE_SAMPLE_CMP; -+ -+ if (params->args_count < 3 || params->args_count > 5 + !!offset_dim) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", -+ name, 5 + !!offset_dim, params->args_count); -+ return false; -+ } -+ -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", -+ name, string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } - -- /* Only HLSL_IR_LOAD can return an object. */ -- object_load = hlsl_ir_load(object); -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ return false; -+ -+ if (!(load_params.cmp = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -+ load_params.cmp = params->args[2]; -+ -+ if (offset_dim && params->args_count > 3) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -+ return false; -+ } -+ -+ if (params->args_count > 3 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "%s() clamp parameter.", name); -+ if (params->args_count > 4 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ -+ load_params.format = object_type->e.resource_format; -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; -+ -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ -+ return true; -+} -+ -+static bool add_gather_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ struct hlsl_resource_load_params load_params = {0}; -+ const struct hlsl_type *sampler_type; -+ struct hlsl_ir_node *load; -+ unsigned int read_channel; - -- if (!strcmp(name, "Load") -+ if (object_type->sampler_dim != HLSL_SAMPLER_DIM_2D -+ && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DARRAY - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) - { -- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; -- struct hlsl_ir_resource_load *load; -- bool multisampled; -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } - -- multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; -+ if (!strcmp(name, "GatherGreen")) -+ { -+ load_params.type = HLSL_RESOURCE_GATHER_GREEN; -+ read_channel = 1; -+ } -+ else if (!strcmp(name, "GatherBlue")) -+ { -+ load_params.type = HLSL_RESOURCE_GATHER_BLUE; -+ read_channel = 2; -+ } -+ else if (!strcmp(name, "GatherAlpha")) -+ { -+ load_params.type = HLSL_RESOURCE_GATHER_ALPHA; -+ read_channel = 3; -+ } -+ else -+ { -+ load_params.type = HLSL_RESOURCE_GATHER_RED; -+ read_channel = 0; -+ } - -- if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) -+ if (!strcmp(name, "Gather") || !offset_dim) -+ { -+ if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", -- 1 + multisampled, 3 + multisampled, params->args_count); -+ "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", -+ name, 3 + !!offset_dim, params->args_count); - return false; - } -- if (multisampled) -- { -- hlsl_fixme(ctx, loc, "Load() sampling index parameter."); -- } -+ } -+ else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", -+ name, params->args_count); -+ return false; -+ } - -- assert(offset_dim); -- if (params->args_count > 1 + multisampled) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -- if (params->args_count > 2 + multisampled) -- { -- hlsl_fixme(ctx, loc, "Tiled resource status argument."); -- } -+ if (params->args_count == 3 + !!offset_dim || params->args_count == 7) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); - -- /* +1 for the mipmap level */ -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) -+ if (params->args_count == 6 || params->args_count == 7) -+ { -+ hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); -+ } -+ else if (offset_dim && params->args_count > 2) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; -+ } - -- load_params.format = object_type->e.resource_format; -- load_params.resource = object_load->src; -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ { -+ struct vkd3d_string_buffer *string; - -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- list_add_tail(instrs, &load->node.entry); -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } -+ -+ if (read_channel >= object_type->e.resource_format->dimx) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Method %s() requires at least %u channels.", name, read_channel + 1); -+ return false; -+ } -+ -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ return false; -+ -+ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; -+ -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ -+static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *dest, -+ struct hlsl_ir_node *src, unsigned int component, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *load; -+ -+ if (!dest) - return true; -+ -+ if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) -+ return false; -+ -+ if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) -+ return false; -+ -+ return true; -+} -+ -+static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ bool uint_resinfo, has_uint_arg, has_float_arg; -+ struct hlsl_resource_load_params load_params; -+ struct hlsl_ir_node *sample_info, *res_info; -+ struct hlsl_ir_node *zero = NULL, *void_ret; -+ struct hlsl_type *uint_type, *float_type; -+ unsigned int i, j; -+ enum func_argument -+ { -+ ARG_MIP_LEVEL, -+ ARG_WIDTH, -+ ARG_HEIGHT, -+ ARG_ELEMENT_COUNT, -+ ARG_LEVEL_COUNT, -+ ARG_SAMPLE_COUNT, -+ ARG_MAX_ARGS, -+ }; -+ struct hlsl_ir_node *args[ARG_MAX_ARGS] = { 0 }; -+ static const struct overload -+ { -+ enum hlsl_sampler_dim sampler_dim; -+ unsigned int args_count; -+ enum func_argument args[ARG_MAX_ARGS]; - } -- else if (!strcmp(name, "Sample") -- && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS -- && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) -+ overloads[] = - { -- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; -- const struct hlsl_type *sampler_type; -- struct hlsl_ir_resource_load *load; -- struct hlsl_ir_load *sampler_load; -+ { HLSL_SAMPLER_DIM_1D, 1, { ARG_WIDTH } }, -+ { HLSL_SAMPLER_DIM_1D, 3, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_1DARRAY, 2, { ARG_WIDTH, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_1DARRAY, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2D, 2, { ARG_WIDTH, ARG_HEIGHT } }, -+ { HLSL_SAMPLER_DIM_2D, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_3D, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_3D, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBE, 2, { ARG_WIDTH, ARG_HEIGHT } }, -+ { HLSL_SAMPLER_DIM_CUBE, 4, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBEARRAY, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT } }, -+ { HLSL_SAMPLER_DIM_CUBEARRAY, 5, { ARG_MIP_LEVEL, ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_LEVEL_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DMS, 3, { ARG_WIDTH, ARG_HEIGHT, ARG_SAMPLE_COUNT } }, -+ { HLSL_SAMPLER_DIM_2DMSARRAY, 4, { ARG_WIDTH, ARG_HEIGHT, ARG_ELEMENT_COUNT, ARG_SAMPLE_COUNT } }, -+ }; -+ const struct overload *o = NULL; -+ -+ if (object_type->sampler_dim > HLSL_SAMPLER_DIM_LAST_TEXTURE) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "GetDimensions() is not defined for this type."); -+ } -+ -+ uint_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); -+ float_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT); -+ has_uint_arg = has_float_arg = false; -+ for (i = 0; i < ARRAY_SIZE(overloads); ++i) -+ { -+ const struct overload *iter = &overloads[i]; - -- if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) -+ if (iter->sampler_dim == object_type->sampler_dim && iter->args_count == params->args_count) - { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", -- 4 + !!offset_dim, params->args_count); -- return false; -+ for (j = 0; j < params->args_count; ++j) -+ { -+ args[iter->args[j]] = params->args[j]; -+ -+ /* Input parameter. */ -+ if (iter->args[j] == ARG_MIP_LEVEL) -+ { -+ if (!(args[ARG_MIP_LEVEL] = add_implicit_conversion(ctx, block, args[ARG_MIP_LEVEL], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ { -+ return false; -+ } -+ -+ continue; -+ } -+ -+ has_float_arg |= hlsl_types_are_equal(params->args[j]->data_type, float_type); -+ has_uint_arg |= hlsl_types_are_equal(params->args[j]->data_type, uint_type); -+ -+ if (params->args[j]->data_type->class != HLSL_CLASS_SCALAR) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected scalar arguments."); -+ break; -+ } -+ } -+ o = iter; -+ break; - } -+ } -+ uint_resinfo = !has_float_arg && has_uint_arg; - -- sampler_type = params->args[0]->data_type; -- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -- { -- struct vkd3d_string_buffer *string; -+ if (!o) -+ { -+ struct vkd3d_string_buffer *string; - -- if ((string = hlsl_type_to_string(ctx, sampler_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); -+ if ((string = hlsl_type_to_string(ctx, object_type))) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Unexpected number of arguments %u for %s.%s().", params->args_count, string->buffer, name); - hlsl_release_string_buffer(ctx, string); -- return false; - } -+ } -+ -+ if (!args[ARG_MIP_LEVEL]) -+ { -+ if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ args[ARG_MIP_LEVEL] = zero; -+ } -+ -+ memset(&load_params, 0, sizeof(load_params)); -+ load_params.type = HLSL_RESOURCE_RESINFO; -+ load_params.resource = object; -+ load_params.lod = args[ARG_MIP_LEVEL]; -+ load_params.format = hlsl_get_vector_type(ctx, uint_resinfo ? HLSL_TYPE_UINT : HLSL_TYPE_FLOAT, 4); -+ -+ if (!(res_info = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, res_info); -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_WIDTH], res_info, 0, loc)) -+ return false; -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_HEIGHT], res_info, 1, loc)) -+ return false; -+ -+ if (!add_assignment_from_component(ctx, block, args[ARG_ELEMENT_COUNT], res_info, -+ object_type->sampler_dim == HLSL_SAMPLER_DIM_1DARRAY ? 1 : 2, loc)) -+ { -+ return false; -+ } - -- /* Only HLSL_IR_LOAD can return an object. */ -- sampler_load = hlsl_ir_load(params->args[0]); -+ if (!add_assignment_from_component(ctx, block, args[ARG_LEVEL_COUNT], res_info, 3, loc)) -+ return false; - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ if (args[ARG_SAMPLE_COUNT]) -+ { -+ memset(&load_params, 0, sizeof(load_params)); -+ load_params.type = HLSL_RESOURCE_SAMPLE_INFO; -+ load_params.resource = object; -+ load_params.format = args[ARG_SAMPLE_COUNT]->data_type; -+ if (!(sample_info = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; -+ hlsl_block_add_instr(block, sample_info); - -- if (offset_dim && params->args_count > 2) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+ if (!add_assignment(ctx, block, args[ARG_SAMPLE_COUNT], ASSIGN_OP_ASSIGN, sample_info)) -+ return false; -+ } -+ -+ if (!(void_ret = hlsl_new_void_expr(ctx, loc))) -+ return false; -+ hlsl_block_add_instr(block, void_ret); -+ -+ return true; -+} -+ -+static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ struct hlsl_resource_load_params load_params = { 0 }; -+ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ const struct hlsl_type *sampler_type; -+ struct hlsl_ir_node *load; -+ -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) -+ { -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } -+ -+ if (!strcmp(name, "SampleLevel")) -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ else -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; -+ -+ if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", -+ name, 4 + !!offset_dim, params->args_count); -+ return false; -+ } -+ -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } - -- if (params->args_count > 2 + !!offset_dim) -- hlsl_fixme(ctx, loc, "Sample() clamp parameter."); -- if (params->args_count > 3 + !!offset_dim) -- hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ load_params.coords = params->args[1]; - -- load_params.format = object_type->e.resource_format; -- load_params.resource = object_load->src; -- load_params.sampler = sampler_load->src; -+ if (!(load_params.lod = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -+ load_params.lod = params->args[2]; - -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ if (offset_dim && params->args_count > 3) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; -- list_add_tail(instrs, &load->node.entry); -- -- return true; - } -- else if ((!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue") -- || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha")) -- && (object_type->sampler_dim == HLSL_SAMPLER_DIM_2D -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DARRAY -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE -- || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY)) -- { -- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -- struct hlsl_resource_load_params load_params = {0}; -- const struct hlsl_type *sampler_type; -- struct hlsl_ir_resource_load *load; -- struct hlsl_ir_load *sampler_load; -- unsigned int read_channel; - -- if (!strcmp(name, "GatherGreen")) -- { -- load_params.type = HLSL_RESOURCE_GATHER_GREEN; -- read_channel = 1; -- } -- else if (!strcmp(name, "GatherBlue")) -- { -- load_params.type = HLSL_RESOURCE_GATHER_BLUE; -- read_channel = 2; -- } -- else if (!strcmp(name, "GatherAlpha")) -- { -- load_params.type = HLSL_RESOURCE_GATHER_ALPHA; -- read_channel = 3; -- } -- else -- { -- load_params.type = HLSL_RESOURCE_GATHER_RED; -- read_channel = 0; -- } -+ if (params->args_count > 3 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); - -- if (!strcmp(name, "Gather") || !offset_dim) -- { -- if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", -- name, 3 + !!offset_dim, params->args_count); -- return false; -- } -- } -- else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", -- name, params->args_count); -- return false; -- } -+ load_params.format = object_type->e.resource_format; -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; - -- if (params->args_count == 3 + !!offset_dim || params->args_count == 7) -- hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ return true; -+} - -- if (params->args_count == 6 || params->args_count == 7) -- { -- hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); -- } -- else if (offset_dim && params->args_count > 2) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ struct hlsl_resource_load_params load_params = { 0 }; -+ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -+ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -+ const struct hlsl_type *sampler_type; -+ struct hlsl_ir_node *load; - -- sampler_type = params->args[0]->data_type; -- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -- { -- struct vkd3d_string_buffer *string; -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) -+ { -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } - -- if ((string = hlsl_type_to_string(ctx, sampler_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); -- hlsl_release_string_buffer(ctx, string); -- return false; -- } -+ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; - -- if (read_channel >= object_type->e.resource_format->dimx) -- { -+ if (params->args_count < 4 || params->args_count > 5 + !!offset_dim) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected from 4 to %u, but got %u.", -+ name, 5 + !!offset_dim, params->args_count); -+ return false; -+ } -+ -+ sampler_type = params->args[0]->data_type; -+ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -+ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Method %s() requires at least %u channels.", name, read_channel + 1); -- return false; -- } -+ "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return false; -+ } - -- /* Only HLSL_IR_LOAD can return an object. */ -- sampler_load = hlsl_ir_load(params->args[0]); -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ load_params.coords = params->args[1]; - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- return false; -+ if (!(load_params.ddx = add_implicit_conversion(ctx, block, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ load_params.ddx = params->args[2]; - -- load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); -- load_params.resource = object_load->src; -- load_params.sampler = sampler_load->src; -+ if (!(load_params.ddy = add_implicit_conversion(ctx, block, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ load_params.ddy = params->args[3]; - -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ if (offset_dim && params->args_count > 4) -+ { -+ if (!(load_params.texel_offset = add_implicit_conversion(ctx, block, params->args[4], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; -- list_add_tail(instrs, &load->node.entry); -- return true; - } -- else if (!strcmp(name, "SampleLevel") -- && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS -- && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) -- { -- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE_LOD}; -- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); -- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); -- const struct hlsl_type *sampler_type; -- struct hlsl_ir_resource_load *load; -- struct hlsl_ir_load *sampler_load; - -- if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) -- { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Wrong number of arguments to method 'SampleLevel': expected from 3 to %u, but got %u.", -- 4 + !!offset_dim, params->args_count); -- return false; -- } -+ if (params->args_count > 4 + !!offset_dim) -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); - -- sampler_type = params->args[0]->data_type; -- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER -- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) -- { -- struct vkd3d_string_buffer *string; -+ load_params.format = object_type->e.resource_format; -+ load_params.resource = object; -+ load_params.sampler = params->args[0]; - -- if ((string = hlsl_type_to_string(ctx, sampler_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Wrong type for argument 0 of SampleLevel(): expected 'sampler', but got '%s'.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- return false; -- } -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ -+static const struct method_function -+{ -+ const char *name; -+ bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); -+} -+object_methods[] = -+{ -+ { "Gather", add_gather_method_call }, -+ { "GatherAlpha", add_gather_method_call }, -+ { "GatherBlue", add_gather_method_call }, -+ { "GatherGreen", add_gather_method_call }, -+ { "GatherRed", add_gather_method_call }, - -- /* Only HLSL_IR_LOAD can return an object. */ -- sampler_load = hlsl_ir_load(params->args[0]); -+ { "GetDimensions", add_getdimensions_method_call }, - -- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -- load_params.coords = params->args[1]; -+ { "Load", add_load_method_call }, - -- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], -- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) -- load_params.lod = params->args[2]; -+ { "Sample", add_sample_method_call }, -+ { "SampleBias", add_sample_lod_method_call }, -+ { "SampleCmp", add_sample_cmp_method_call }, -+ { "SampleCmpLevelZero", add_sample_cmp_method_call }, -+ { "SampleGrad", add_sample_grad_method_call }, -+ { "SampleLevel", add_sample_lod_method_call }, -+}; - -- if (offset_dim && params->args_count > 3) -- { -- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], -- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) -- return false; -- } -+static int object_method_function_name_compare(const void *a, const void *b) -+{ -+ const struct method_function *func = b; - -- if (params->args_count > 3 + !!offset_dim) -- hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ return strcmp(a, func->name); -+} - -- load_params.format = object_type->e.resource_format; -- load_params.resource = object_load->src; -- load_params.sampler = sampler_load->src; -+static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ const struct hlsl_type *object_type = object->data_type; -+ const struct method_function *method; - -- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -- return false; -- list_add_tail(instrs, &load->node.entry); -- return true; -- } -- else -+ if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE -+ || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, object_type))) -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, -- "Method '%s' is not defined on type '%s'.", name, string->buffer); -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Type '%s' does not have methods.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } -+ -+ if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), -+ sizeof(*method), object_method_function_name_compare))) -+ { -+ return method->handler(ctx, block, object, name, params, loc); -+ } -+ else -+ { -+ return raise_invalid_method_object_type(ctx, object_type, name, loc); -+ } - } - - static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type *format, - const struct vkd3d_shader_location *loc) - { -- if (format->type > HLSL_CLASS_VECTOR) -+ if (format->class > HLSL_CLASS_VECTOR) - { - struct vkd3d_string_buffer *string; - -@@ -3800,6 +4564,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - char *name; - DWORD modifiers; - struct hlsl_ir_node *instr; -+ struct hlsl_block *block; - struct list *list; - struct parse_fields fields; - struct parse_function function; -@@ -3846,6 +4611,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %token KW_NAMESPACE - %token KW_NOINTERPOLATION - %token KW_OUT -+%token KW_PACKOFFSET - %token KW_PASS - %token KW_PIXELSHADER - %token KW_PRECISE -@@ -3854,6 +4620,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %token KW_RETURN - %token KW_REGISTER - %token KW_ROW_MAJOR -+%token KW_RWBUFFER -+%token KW_RWSTRUCTUREDBUFFER - %token KW_RWTEXTURE1D - %token KW_RWTEXTURE2D - %token KW_RWTEXTURE3D -@@ -3924,37 +4692,9 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %token C_INTEGER - %token PRE_LINE - --%type add_expr --%type assignment_expr --%type bitand_expr --%type bitor_expr --%type bitxor_expr --%type compound_statement --%type conditional_expr --%type declaration --%type declaration_statement --%type equality_expr --%type expr --%type expr_optional --%type expr_statement --%type initializer_expr --%type jump_statement --%type logicand_expr --%type logicor_expr --%type loop_statement --%type mul_expr --%type postfix_expr --%type primary_expr --%type relational_expr --%type selection_statement --%type shift_expr --%type statement --%type statement_list --%type struct_declaration - %type type_specs --%type unary_expr - %type variables_def --%type variables_def_optional -+%type variables_def_typed - - %token VAR_IDENTIFIER - %token NEW_IDENTIFIER -@@ -3968,6 +4708,36 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type attribute - - %type attribute_list -+%type attribute_list_optional -+ -+%type add_expr -+%type assignment_expr -+%type bitand_expr -+%type bitor_expr -+%type bitxor_expr -+%type compound_statement -+%type conditional_expr -+%type declaration -+%type declaration_statement -+%type equality_expr -+%type expr -+%type expr_optional -+%type expr_statement -+%type initializer_expr -+%type jump_statement -+%type logicand_expr -+%type logicor_expr -+%type loop_statement -+%type mul_expr -+%type postfix_expr -+%type primary_expr -+%type relational_expr -+%type shift_expr -+%type selection_statement -+%type statement -+%type statement_list -+%type struct_declaration_without_vars -+%type unary_expr - - %type boolean - -@@ -3999,6 +4769,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type parameters - - %type register_opt -+%type packoffset_opt - - %type texture_type texture_ms_type uav_type - -@@ -4015,6 +4786,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type - %type type_spec - %type variable_decl - %type variable_def -+%type variable_def_typed - - %% - -@@ -4024,9 +4796,9 @@ hlsl_prog: - | hlsl_prog buffer_declaration buffer_body - | hlsl_prog declaration_statement - { -- if (!list_empty($2)) -+ if (!list_empty(&$2->instrs)) - hlsl_fixme(ctx, &@2, "Uniform initializer."); -- destroy_instr_list($2); -+ destroy_block($2); - } - | hlsl_prog preproc_directive - | hlsl_prog ';' -@@ -4037,7 +4809,7 @@ buffer_declaration: - if ($3.semantic.name) - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); - -- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, @2))) -+ if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) - YYABORT; - } - -@@ -4083,25 +4855,19 @@ preproc_directive: - } - } - --struct_declaration: -- var_modifiers struct_spec variables_def_optional ';' -+struct_declaration_without_vars: -+ var_modifiers struct_spec ';' - { -- struct hlsl_type *type; -- unsigned int modifiers = $1; -+ if (!$2->name) -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Anonymous struct type must declare a variable."); - -- if (!$3) -- { -- if (!$2->name) -- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Anonymous struct type must declare a variable."); -- if (modifiers) -- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -- "Modifiers are not allowed on struct type declarations."); -- } -+ if ($1) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Modifiers are not allowed on struct type declarations."); - -- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; -- $$ = declare_vars(ctx, type, modifiers, &@1, $3); - } - - struct_spec: -@@ -4208,7 +4974,7 @@ attribute: - YYABORT; - } - $$->name = $2; -- list_init(&$$->instrs); -+ hlsl_block_init(&$$->instrs); - $$->loc = @$; - $$->args_count = 0; - } -@@ -4223,8 +4989,8 @@ attribute: - YYABORT; - } - $$->name = $2; -- list_init(&$$->instrs); -- list_move_tail(&$$->instrs, $4.instrs); -+ hlsl_block_init(&$$->instrs); -+ hlsl_block_add_block(&$$->instrs, $4.instrs); - vkd3d_free($4.instrs); - $$->loc = @$; - $$->args_count = $4.args_count; -@@ -4261,6 +5027,14 @@ attribute_list: - $$.attrs[$$.count++] = $2; - } - -+attribute_list_optional: -+ %empty -+ { -+ $$.count = 0; -+ $$.attrs = NULL; -+ } -+ | attribute_list -+ - func_declaration: - func_prototype compound_statement - { -@@ -4272,15 +5046,15 @@ func_declaration: - "Function \"%s\" is already defined.", decl->func->name); - hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, - "\"%s\" was previously defined here.", decl->func->name); -- hlsl_free_instr_list($2); -+ destroy_block($2); - } - else - { - size_t i; - - decl->has_body = true; -- list_move_tail(&decl->body.instrs, $2); -- vkd3d_free($2); -+ hlsl_block_add_block(&decl->body, $2); -+ destroy_block($2); - - /* Semantics are taken from whichever definition has a body. - * We can't just replace the hlsl_ir_var pointers, though: if -@@ -4331,6 +5105,9 @@ func_prototype_no_attrs: - struct hlsl_ir_var *var; - struct hlsl_type *type; - -+ /* Functions are unconditionally inlined. */ -+ modifiers &= ~HLSL_MODIFIER_INLINE; -+ - if (modifiers & ~HLSL_MODIFIERS_MAJORITY_MASK) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Only majority modifiers are allowed on functions."); -@@ -4349,8 +5126,11 @@ func_prototype_no_attrs: - "Semantics are not allowed on void functions."); - } - -- if ($7.reg_reservation.type) -+ if ($7.reg_reservation.reg_type) - FIXME("Unexpected register reservation for a function.\n"); -+ if ($7.reg_reservation.offset_type) -+ hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() is not allowed on functions."); - - if (($$.decl = get_func_decl(&ctx->functions, $3, &$5))) - { -@@ -4454,7 +5234,7 @@ func_prototype: - compound_statement: - '{' '}' - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - | '{' scope_start statement_list '}' -@@ -4476,17 +5256,24 @@ var_identifier: - colon_attribute: - %empty - { -- $$.semantic.name = NULL; -- $$.reg_reservation.type = 0; -+ $$.semantic = (struct hlsl_semantic){0}; -+ $$.reg_reservation.reg_type = 0; -+ $$.reg_reservation.offset_type = 0; - } - | semantic - { - $$.semantic = $1; -- $$.reg_reservation.type = 0; -+ $$.reg_reservation.reg_type = 0; -+ $$.reg_reservation.offset_type = 0; - } - | register_opt - { -- $$.semantic.name = NULL; -+ $$.semantic = (struct hlsl_semantic){0}; -+ $$.reg_reservation = $1; -+ } -+ | packoffset_opt -+ { -+ $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation = $1; - } - -@@ -4499,6 +5286,9 @@ semantic: - ; - $$.name = $2; - $$.index = atoi(p); -+ $$.reported_missing = false; -+ $$.reported_duplicated_output_next_index = 0; -+ $$.reported_duplicated_input_incompatible_next_index = 0; - *p = 0; - } - -@@ -4518,6 +5308,21 @@ register_opt: - vkd3d_free($6); - } - -+packoffset_opt: -+ ':' KW_PACKOFFSET '(' any_identifier ')' -+ { -+ $$ = parse_packoffset(ctx, $4, NULL, &@$); -+ -+ vkd3d_free($4); -+ } -+ | ':' KW_PACKOFFSET '(' any_identifier '.' any_identifier ')' -+ { -+ $$ = parse_packoffset(ctx, $4, $6, &@$); -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ } -+ - parameters: - scope_start - { -@@ -4536,7 +5341,7 @@ param_list: - parameter - { - memset(&$$, 0, sizeof($$)); -- if (!add_func_parameter(ctx, &$$, &$1, @1)) -+ if (!add_func_parameter(ctx, &$$, &$1, &@1)) - { - ERR("Error adding function parameter %s.\n", $1.name); - YYABORT; -@@ -4545,7 +5350,7 @@ param_list: - | param_list ',' parameter - { - $$ = $1; -- if (!add_func_parameter(ctx, &$$, &$3, @3)) -+ if (!add_func_parameter(ctx, &$$, &$3, &@3)) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Parameter \"%s\" is already declared.", $3.name); -@@ -4624,7 +5429,15 @@ texture_ms_type: - } - - uav_type: -- KW_RWTEXTURE1D -+ KW_RWBUFFER -+ { -+ $$ = HLSL_SAMPLER_DIM_BUFFER; -+ } -+ | KW_RWSTRUCTUREDBUFFER -+ { -+ $$ = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; -+ } -+ | KW_RWTEXTURE1D - { - $$ = HLSL_SAMPLER_DIM_1D; - } -@@ -4640,7 +5453,7 @@ uav_type: - type_no_void: - KW_VECTOR '<' type ',' C_INTEGER '>' - { -- if ($3->type != HLSL_CLASS_SCALAR) -+ if ($3->class != HLSL_CLASS_SCALAR) - { - struct vkd3d_string_buffer *string; - -@@ -4667,7 +5480,7 @@ type_no_void: - } - | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' - { -- if ($3->type != HLSL_CLASS_SCALAR) -+ if ($3->class != HLSL_CLASS_SCALAR) - { - struct vkd3d_string_buffer *string; - -@@ -4702,6 +5515,10 @@ type_no_void: - { - $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; - } -+ | KW_SAMPLERCOMPARISONSTATE -+ { -+ $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_COMPARISON]; -+ } - | KW_SAMPLER1D - { - $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_1D]; -@@ -4716,7 +5533,7 @@ type_no_void: - } - | KW_SAMPLERCUBE - { -- $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_3D]; -+ $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_CUBE]; - } - | KW_TEXTURE - { -@@ -4735,28 +5552,68 @@ type_no_void: - { - validate_texture_format_type(ctx, $3, &@3); - -- /* TODO: unspecified sample count is not allowed for all targets */ -+ if (shader_profile_version_lt(ctx, 4, 1)) -+ { -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -+ } -+ - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } - | texture_ms_type '<' type ',' shift_expr '>' - { -- unsigned int sample_count = evaluate_static_expression(node_from_list($5)); -- destroy_instr_list($5); -+ unsigned int sample_count; -+ struct hlsl_block block; -+ -+ hlsl_block_init(&block); -+ hlsl_block_add_block(&block, $5); -+ -+ sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); -+ -+ hlsl_block_cleanup(&block); -+ -+ vkd3d_free($5); - - $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); - } - | uav_type '<' type '>' - { -- if ($3->type > HLSL_CLASS_VECTOR) -- { -- struct vkd3d_string_buffer *string; -+ struct vkd3d_string_buffer *string = hlsl_type_to_string(ctx, $3); - -- string = hlsl_type_to_string(ctx, $3); -+ if (!type_contains_only_numerics($3)) -+ { - if (string) - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "UAV data type %s is not scalar or vector.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -+ "UAV type %s is not numeric.", string->buffer); -+ } -+ -+ switch ($1) -+ { -+ case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_1D: -+ case HLSL_SAMPLER_DIM_2D: -+ case HLSL_SAMPLER_DIM_3D: -+ if ($3->class == HLSL_CLASS_ARRAY) -+ { -+ if (string) -+ hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "This type of UAV does not support array type."); -+ } -+ else if (hlsl_type_component_count($3) > 4) -+ { -+ if (string) -+ hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "UAV data type %s size exceeds maximum size.", string->buffer); -+ } -+ break; -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ break; -+ default: -+ vkd3d_unreachable(); - } -+ -+ hlsl_release_string_buffer(ctx, string); -+ - $$ = hlsl_new_uav_type(ctx, $1, $3); - } - | TYPE_IDENTIFIER -@@ -4764,7 +5621,7 @@ type_no_void: - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); - if ($$->is_minimum_precision) - { -- if (ctx->profile->major_version < 4) -+ if (shader_profile_version_lt(ctx, 4, 0)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support minimum-precision types."); -@@ -4779,7 +5636,7 @@ type_no_void: - | KW_STRUCT TYPE_IDENTIFIER - { - $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); -- if ($$->type != HLSL_CLASS_STRUCT) -+ if ($$->class != HLSL_CLASS_STRUCT) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "\"%s\" redefined as a structure.", $2); - vkd3d_free($2); - } -@@ -4793,10 +5650,10 @@ type: - - declaration_statement: - declaration -- | struct_declaration -+ | struct_declaration_without_vars - | typedef - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - -@@ -4855,22 +5712,11 @@ type_spec: - } - - declaration: -- var_modifiers type variables_def ';' -+ variables_def_typed ';' - { -- struct hlsl_type *type; -- unsigned int modifiers = $1; -- -- if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ if (!($$ = initialize_vars(ctx, $1))) - YYABORT; -- $$ = declare_vars(ctx, type, modifiers, &@1, $3); -- } -- --variables_def_optional: -- %empty -- { -- $$ = NULL; - } -- | variables_def - - variables_def: - variable_def -@@ -4885,6 +5731,33 @@ variables_def: - list_add_tail($$, &$3->entry); - } - -+variables_def_typed: -+ variable_def_typed -+ { -+ if (!($$ = make_empty_list(ctx))) -+ YYABORT; -+ list_add_head($$, &$1->entry); -+ -+ declare_var(ctx, $1); -+ } -+ | variables_def_typed ',' variable_def -+ { -+ struct parse_variable_def *head_def; -+ -+ assert(!list_empty($1)); -+ head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); -+ -+ assert(head_def->basic_type); -+ $3->basic_type = head_def->basic_type; -+ $3->modifiers = head_def->modifiers; -+ $3->modifiers_loc = head_def->modifiers_loc; -+ -+ declare_var(ctx, $3); -+ -+ $$ = $1; -+ list_add_tail($$, &$3->entry); -+ } -+ - variable_decl: - any_identifier arrays colon_attribute - { -@@ -4900,7 +5773,7 @@ state: - any_identifier '=' expr ';' - { - vkd3d_free($1); -- hlsl_free_instr_list($3); -+ destroy_block($3); - } - - state_block_start: -@@ -4926,6 +5799,38 @@ variable_def: - ctx->in_state_block = 0; - } - -+variable_def_typed: -+ var_modifiers struct_spec variable_def -+ { -+ unsigned int modifiers = $1; -+ struct hlsl_type *type; -+ -+ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ YYABORT; -+ -+ check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ -+ $$ = $3; -+ $$->basic_type = type; -+ $$->modifiers = modifiers; -+ $$->modifiers_loc = @1; -+ } -+ | var_modifiers type variable_def -+ { -+ unsigned int modifiers = $1; -+ struct hlsl_type *type; -+ -+ if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) -+ YYABORT; -+ -+ check_invalid_in_out_modifiers(ctx, modifiers, &@1); -+ -+ $$ = $3; -+ $$->basic_type = type; -+ $$->modifiers = modifiers; -+ $$->modifiers_loc = @1; -+ } -+ - arrays: - %empty - { -@@ -4934,10 +5839,12 @@ arrays: - } - | '[' expr ']' arrays - { -- unsigned int size = evaluate_static_expression(node_from_list($2)); - uint32_t *new_array; -+ unsigned int size; - -- destroy_instr_list($2); -+ size = evaluate_static_expression_as_uint(ctx, $2, &@2); -+ -+ destroy_block($2); - - $$ = $4; - -@@ -4988,59 +5895,63 @@ var_modifiers: - } - | KW_EXTERN var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, &@1); - } - | KW_NOINTERPOLATION var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, &@1); - } - | KW_PRECISE var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); - } - | KW_SHARED var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); - } - | KW_GROUPSHARED var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, &@1); - } - | KW_STATIC var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, &@1); - } - | KW_UNIFORM var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, &@1); - } - | KW_VOLATILE var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, &@1); - } - | KW_CONST var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, &@1); - } - | KW_ROW_MAJOR var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, &@1); - } - | KW_COLUMN_MAJOR var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, &@1); - } - | KW_IN var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, &@1); - } - | KW_OUT var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, &@1); - } - | KW_INOUT var_modifiers - { -- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, @1); -+ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); -+ } -+ | KW_INLINE var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); - } - - -@@ -5050,10 +5961,10 @@ complex_initializer: - $$.args_count = 1; - if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } -- $$.args[0] = node_from_list($1); -+ $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; - } -@@ -5085,7 +5996,7 @@ complex_initializer_list: - $$.args = new_args; - for (i = 0; i < $3.args_count; ++i) - $$.args[$$.args_count++] = $3.args[i]; -- list_move_tail($$.instrs, $3.instrs); -+ hlsl_block_add_block($$.instrs, $3.instrs); - free_parse_initializer(&$3); - } - -@@ -5098,10 +6009,10 @@ initializer_expr_list: - $$.args_count = 1; - if (!($$.args = hlsl_alloc(ctx, sizeof(*$$.args)))) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } -- $$.args[0] = node_from_list($1); -+ $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; - } -@@ -5113,13 +6024,13 @@ initializer_expr_list: - if (!(new_args = hlsl_realloc(ctx, $$.args, ($$.args_count + 1) * sizeof(*$$.args)))) - { - free_parse_initializer(&$$); -- destroy_instr_list($3); -+ destroy_block($3); - YYABORT; - } - $$.args = new_args; -- $$.args[$$.args_count++] = node_from_list($3); -- list_move_tail($$.instrs, $3); -- vkd3d_free($3); -+ $$.args[$$.args_count++] = node_from_block($3); -+ hlsl_block_add_block($$.instrs, $3); -+ destroy_block($3); - } - - boolean: -@@ -5137,8 +6048,8 @@ statement_list: - | statement_list statement - { - $$ = $1; -- list_move_tail($$, $2); -- vkd3d_free($2); -+ hlsl_block_add_block($$, $2); -+ destroy_block($2); - } - - statement: -@@ -5152,80 +6063,116 @@ statement: - jump_statement: - KW_RETURN expr ';' - { -- if (!add_return(ctx, $2, node_from_list($2), @1)) -- YYABORT; - $$ = $2; -+ if (!add_return(ctx, $$, node_from_block($$), &@1)) -+ YYABORT; - } - | KW_RETURN ';' - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) -+ YYABORT; -+ if (!add_return(ctx, $$, NULL, &@1)) - YYABORT; -- if (!add_return(ctx, $$, NULL, @1)) -+ } -+ | KW_DISCARD ';' -+ { -+ struct hlsl_ir_node *discard, *c; -+ -+ if (!($$ = make_empty_block(ctx))) - YYABORT; -+ -+ if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) -+ return false; -+ hlsl_block_add_instr($$, c); -+ -+ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) -+ return false; -+ hlsl_block_add_instr($$, discard); - } - - selection_statement: -- KW_IF '(' expr ')' if_body -+ attribute_list_optional KW_IF '(' expr ')' if_body - { -- struct hlsl_ir_node *condition = node_from_list($3); -- struct hlsl_ir_if *instr; -+ struct hlsl_ir_node *condition = node_from_block($4); -+ const struct parse_attribute_list *attributes = &$1; -+ struct hlsl_ir_node *instr; -+ unsigned int i; -+ -+ if (attribute_list_has_duplicates(attributes)) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); -+ -+ for (i = 0; i < attributes->count; ++i) -+ { -+ const struct hlsl_attribute *attr = attributes->attrs[i]; -+ -+ if (!strcmp(attr->name, "branch") -+ || !strcmp(attr->name, "flatten")) -+ { -+ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, "Unhandled attribute '%s'.", attr->name); -+ } -+ else -+ { -+ hlsl_warning(ctx, &@1, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, "Unrecognized attribute '%s'.", attr->name); -+ } -+ } - -- if (!(instr = hlsl_new_if(ctx, condition, @1))) -+ if (!(instr = hlsl_new_if(ctx, condition, $6.then_block, $6.else_block, &@2))) -+ { -+ destroy_block($6.then_block); -+ destroy_block($6.else_block); - YYABORT; -- list_move_tail(&instr->then_instrs.instrs, $5.then_instrs); -- if ($5.else_instrs) -- list_move_tail(&instr->else_instrs.instrs, $5.else_instrs); -- vkd3d_free($5.then_instrs); -- vkd3d_free($5.else_instrs); -+ } -+ destroy_block($6.then_block); -+ destroy_block($6.else_block); - if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) - { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_type_to_string(ctx, condition->data_type))) -- hlsl_error(ctx, &instr->node.loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "if condition type %s is not scalar.", string->buffer); - hlsl_release_string_buffer(ctx, string); - } -- $$ = $3; -- list_add_tail($$, &instr->node.entry); -+ $$ = $4; -+ hlsl_block_add_instr($$, instr); - } - - if_body: - statement - { -- $$.then_instrs = $1; -- $$.else_instrs = NULL; -+ $$.then_block = $1; -+ $$.else_block = NULL; - } - | statement KW_ELSE statement - { -- $$.then_instrs = $1; -- $$.else_instrs = $3; -+ $$.then_block = $1; -+ $$.else_block = $3; - } - - loop_statement: -- KW_WHILE '(' expr ')' statement -+ attribute_list_optional KW_WHILE '(' expr ')' statement - { -- $$ = create_loop(ctx, LOOP_WHILE, NULL, $3, NULL, $5, @1); -+ $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $4, NULL, $6, &@2); - } -- | KW_DO statement KW_WHILE '(' expr ')' ';' -+ | attribute_list_optional KW_DO statement KW_WHILE '(' expr ')' ';' - { -- $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); -+ $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $6, NULL, $3, &@2); - } -- | KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement -+ | attribute_list_optional KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement - { -- $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); -+ $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); - hlsl_pop_scope(ctx); - } -- | KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement -+ | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement - { -- $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); -+ $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); - hlsl_pop_scope(ctx); - } - - expr_optional: - %empty - { -- if (!($$ = make_empty_list(ctx))) -+ if (!($$ = make_empty_block(ctx))) - YYABORT; - } - | expr -@@ -5241,7 +6188,7 @@ func_arguments: - { - $$.args = NULL; - $$.args_count = 0; -- if (!($$.instrs = make_empty_list(ctx))) -+ if (!($$.instrs = make_empty_block(ctx))) - YYABORT; - $$.braces = false; - } -@@ -5250,31 +6197,31 @@ func_arguments: - primary_expr: - C_FLOAT - { -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *c; - - if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &c->node))) -+ if (!($$ = make_block(ctx, c))) - YYABORT; - } - | C_INTEGER - { -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *c; - - if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &c->node))) -+ if (!($$ = make_block(ctx, c))) - YYABORT; - } - | boolean - { -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *c; - - if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &c->node))) -+ if (!($$ = make_block(ctx, c))) - { -- hlsl_free_instr(&c->node); -+ hlsl_free_instr(c); - YYABORT; - } - } -@@ -5288,9 +6235,9 @@ primary_expr: - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); - YYABORT; - } -- if (!(load = hlsl_new_var_load(ctx, var, @1))) -+ if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &load->node))) -+ if (!($$ = make_block(ctx, &load->node))) - YYABORT; - } - | '(' expr ')' -@@ -5316,9 +6263,9 @@ primary_expr: - if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", - hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) - YYABORT; -- if (!(load = hlsl_new_var_load(ctx, var, @1))) -+ if (!(load = hlsl_new_var_load(ctx, var, &@1))) - YYABORT; -- if (!($$ = make_list(ctx, &load->node))) -+ if (!($$ = make_block(ctx, &load->node))) - YYABORT; - } - else -@@ -5332,27 +6279,27 @@ postfix_expr: - primary_expr - | postfix_expr OP_INC - { -- if (!add_increment(ctx, $1, false, true, @2)) -+ if (!add_increment(ctx, $1, false, true, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } - $$ = $1; - } - | postfix_expr OP_DEC - { -- if (!add_increment(ctx, $1, true, true, @2)) -+ if (!add_increment(ctx, $1, true, true, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($1); - YYABORT; - } - $$ = $1; - } - | postfix_expr '.' any_identifier - { -- struct hlsl_ir_node *node = node_from_list($1); -+ struct hlsl_ir_node *node = node_from_block($1); - -- if (node->data_type->type == HLSL_CLASS_STRUCT) -+ if (node->data_type->class == HLSL_CLASS_STRUCT) - { - struct hlsl_type *type = node->data_type; - const struct hlsl_struct_field *field; -@@ -5365,20 +6312,20 @@ postfix_expr: - } - - field_idx = field - type->e.record.fields; -- if (!add_record_load(ctx, $1, node, field_idx, @2)) -+ if (!add_record_access(ctx, $1, node, field_idx, &@2)) - YYABORT; - $$ = $1; - } -- else if (node->data_type->type <= HLSL_CLASS_LAST_NUMERIC) -+ else if (node->data_type->class <= HLSL_CLASS_LAST_NUMERIC) - { -- struct hlsl_ir_swizzle *swizzle; -+ struct hlsl_ir_node *swizzle; - - if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); - YYABORT; - } -- list_add_tail($1, &swizzle->node.entry); -+ hlsl_block_add_instr($1, swizzle); - $$ = $1; - } - else -@@ -5389,17 +6336,17 @@ postfix_expr: - } - | postfix_expr '[' expr ']' - { -- struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); -+ struct hlsl_ir_node *array = node_from_block($1), *index = node_from_block($3); - -- list_move_tail($1, $3); -- vkd3d_free($3); -+ hlsl_block_add_block($3, $1); -+ destroy_block($1); - -- if (!add_array_load(ctx, $1, array, index, &@2)) -+ if (!add_array_access(ctx, $3, array, index, &@2)) - { -- destroy_instr_list($1); -+ destroy_block($3); - YYABORT; - } -- $$ = $1; -+ $$ = $3; - } - - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ -@@ -5412,7 +6359,7 @@ postfix_expr: - free_parse_initializer(&$4); - YYABORT; - } -- if ($2->type > HLSL_CLASS_LAST_NUMERIC) -+ if ($2->class > HLSL_CLASS_LAST_NUMERIC) - { - struct vkd3d_string_buffer *string; - -@@ -5432,7 +6379,7 @@ postfix_expr: - YYABORT; - } - -- if (!($$ = add_constructor(ctx, $2, &$4, @2))) -+ if (!($$ = add_constructor(ctx, $2, &$4, &@2))) - { - free_parse_initializer(&$4); - YYABORT; -@@ -5440,14 +6387,14 @@ postfix_expr: - } - | postfix_expr '.' any_identifier '(' func_arguments ')' - { -- struct hlsl_ir_node *object = node_from_list($1); -+ struct hlsl_ir_node *object = node_from_block($1); - -- list_move_tail($1, $5.instrs); -+ hlsl_block_add_block($1, $5.instrs); - vkd3d_free($5.instrs); - - if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) - { -- hlsl_free_instr_list($1); -+ destroy_block($1); - vkd3d_free($5.args); - YYABORT; - } -@@ -5459,18 +6406,18 @@ unary_expr: - postfix_expr - | OP_INC unary_expr - { -- if (!add_increment(ctx, $2, false, false, @1)) -+ if (!add_increment(ctx, $2, false, false, &@1)) - { -- destroy_instr_list($2); -+ destroy_block($2); - YYABORT; - } - $$ = $2; - } - | OP_DEC unary_expr - { -- if (!add_increment(ctx, $2, true, false, @1)) -+ if (!add_increment(ctx, $2, true, false, &@1)) - { -- destroy_instr_list($2); -+ destroy_block($2); - YYABORT; - } - $$ = $2; -@@ -5481,23 +6428,23 @@ unary_expr: - } - | '-' unary_expr - { -- add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); -+ add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_block($2), &@1); - $$ = $2; - } - | '~' unary_expr - { -- add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_list($2), &@1); -+ add_unary_bitwise_expr(ctx, $2, HLSL_OP1_BIT_NOT, node_from_block($2), &@1); - $$ = $2; - } - | '!' unary_expr - { -- add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_list($2), &@1); -+ add_unary_logical_expr(ctx, $2, HLSL_OP1_LOGIC_NOT, node_from_block($2), &@1); - $$ = $2; - } - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ - | '(' var_modifiers type arrays ')' unary_expr - { -- struct hlsl_type *src_type = node_from_list($6)->data_type; -+ struct hlsl_type *src_type = node_from_block($6)->data_type; - struct hlsl_type *dst_type; - unsigned int i; - -@@ -5533,9 +6480,9 @@ unary_expr: - YYABORT; - } - -- if (!add_cast(ctx, $6, node_from_list($6), dst_type, &@3)) -+ if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) - { -- hlsl_free_instr_list($6); -+ destroy_block($6); - YYABORT; - } - $$ = $6; -@@ -5545,114 +6492,138 @@ mul_expr: - unary_expr - | mul_expr '*' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); - } - | mul_expr '/' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); - } - | mul_expr '%' unary_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); - } - - add_expr: - mul_expr - | add_expr '+' mul_expr - { -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); - } - | add_expr '-' mul_expr - { - struct hlsl_ir_node *neg; - -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), @2))) -+ if (!(neg = add_unary_arithmetic_expr(ctx, $3, HLSL_OP1_NEG, node_from_block($3), &@2))) - YYABORT; -- list_add_tail($3, &neg->entry); -- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); - } - - shift_expr: - add_expr - | shift_expr OP_LEFTSHIFT add_expr - { -- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); - } - | shift_expr OP_RIGHTSHIFT add_expr - { -- $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); - } - - relational_expr: - shift_expr - | relational_expr '<' shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); - } - | relational_expr '>' shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, @2); -+ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); - } - | relational_expr OP_LE shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, @2); -+ $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); - } - | relational_expr OP_GE shift_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); - } - - equality_expr: - relational_expr - | equality_expr OP_EQ relational_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); - } - | equality_expr OP_NE relational_expr - { -- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, @2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); - } - - bitand_expr: - equality_expr - | bitand_expr '&' equality_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); - } - - bitxor_expr: - bitand_expr - | bitxor_expr '^' bitand_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); - } - - bitor_expr: - bitxor_expr - | bitor_expr '|' bitxor_expr - { -- $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); - } - - logicand_expr: - bitor_expr - | logicand_expr OP_AND bitor_expr - { -- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); - } - - logicor_expr: - logicand_expr - | logicor_expr OP_OR logicand_expr - { -- $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); -+ $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); - } - - conditional_expr: - logicor_expr - | logicor_expr '?' expr ':' assignment_expr - { -- hlsl_fixme(ctx, &@$, "Ternary operator."); -+ struct hlsl_ir_node *cond = node_from_block($1); -+ struct hlsl_ir_node *first = node_from_block($3); -+ struct hlsl_ir_node *second = node_from_block($5); -+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_type *common_type; -+ -+ hlsl_block_add_block($1, $3); -+ hlsl_block_add_block($1, $5); -+ destroy_block($3); -+ destroy_block($5); -+ -+ if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) -+ YYABORT; -+ -+ if (!(first = add_implicit_conversion(ctx, $1, first, common_type, &@3))) -+ YYABORT; -+ -+ if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) -+ YYABORT; -+ -+ args[0] = cond; -+ args[1] = first; -+ args[2] = second; -+ if (!add_expr(ctx, $1, HLSL_OP3_TERNARY, args, common_type, &@1)) -+ YYABORT; -+ $$ = $1; - } - - assignment_expr: -@@ -5660,15 +6631,15 @@ assignment_expr: - conditional_expr - | unary_expr assign_op assignment_expr - { -- struct hlsl_ir_node *lhs = node_from_list($1), *rhs = node_from_list($3); -+ struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); - - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); - YYABORT; - } -- list_move_tail($3, $1); -- vkd3d_free($1); -+ hlsl_block_add_block($3, $1); -+ destroy_block($1); - if (!add_assignment(ctx, $3, lhs, $2, rhs)) - YYABORT; - $$ = $3; -@@ -5725,6 +6696,6 @@ expr: - | expr ',' assignment_expr - { - $$ = $1; -- list_move_tail($$, $3); -- vkd3d_free($3); -+ hlsl_block_add_block($$, $3); -+ destroy_block($3); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index ab59875738c..be024842164 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -27,11 +27,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - enum hlsl_regset regset, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *idx_offset = NULL; -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *c; - -- list_init(&block->instrs); -+ hlsl_block_init(block); - -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_VECTOR: - idx_offset = idx; -@@ -41,11 +41,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - { - if (!(c = hlsl_new_uint_constant(ctx, 4, loc))) - return NULL; -- list_add_tail(&block->instrs, &c->node.entry); -+ hlsl_block_add_instr(block, c); - -- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) -+ if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) - return NULL; -- list_add_tail(&block->instrs, &idx_offset->entry); -+ hlsl_block_add_instr(block, idx_offset); - - break; - } -@@ -56,25 +56,25 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - - if (!(c = hlsl_new_uint_constant(ctx, size, loc))) - return NULL; -- list_add_tail(&block->instrs, &c->node.entry); -+ hlsl_block_add_instr(block, c); - -- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) -+ if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) - return NULL; -- list_add_tail(&block->instrs, &idx_offset->entry); -+ hlsl_block_add_instr(block, idx_offset); - - break; - } - - case HLSL_CLASS_STRUCT: - { -- unsigned int field_idx = hlsl_ir_constant(idx)->value[0].u; -+ unsigned int field_idx = hlsl_ir_constant(idx)->value.u[0].u; - struct hlsl_struct_field *field = &type->e.record.fields[field_idx]; - - if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset[regset], loc))) - return NULL; -- list_add_tail(&block->instrs, &c->node.entry); -+ hlsl_block_add_instr(block, c); - -- idx_offset = &c->node; -+ idx_offset = c; - - break; - } -@@ -87,7 +87,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - { - if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, offset, idx_offset))) - return NULL; -- list_add_tail(&block->instrs, &idx_offset->entry); -+ hlsl_block_add_instr(block, idx_offset); - } - - return idx_offset; -@@ -97,11 +97,12 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_regset regset = hlsl_type_get_regset(deref->data_type); - struct hlsl_ir_node *offset = NULL; - struct hlsl_type *type; - unsigned int i; - -- list_init(&block->instrs); -+ hlsl_block_init(block); - - assert(deref->var); - type = deref->var->data_type; -@@ -111,10 +112,10 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - struct hlsl_block idx_block; - - if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, -- deref->offset_regset, loc))) -+ regset, loc))) - return NULL; - -- list_move_tail(&block->instrs, &idx_block.instrs); -+ hlsl_block_add_block(block, &idx_block); - - type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); - } -@@ -123,15 +124,14 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - } - - /* TODO: remove when no longer needed, only used for transform_deref_paths_into_offsets() */ --static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, -+static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - struct hlsl_ir_node *instr) - { -- const struct hlsl_type *type; -+ struct hlsl_type *type; - struct hlsl_ir_node *offset; - struct hlsl_block block; - -- if (!deref->var) -- return; -+ assert(deref->var); - - /* register offsets shouldn't be used before this point is reached. */ - assert(!deref->offset.node); -@@ -140,65 +140,39 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der - - /* Instructions that directly refer to structs or arrays (instead of single-register components) - * are removed later by dce. So it is not a problem to just cleanup their derefs. */ -- if (type->type == HLSL_CLASS_STRUCT || type->type == HLSL_CLASS_ARRAY) -+ if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY) - { - hlsl_cleanup_deref(deref); -- return; -+ return true; - } - -- deref->offset_regset = hlsl_type_get_regset(type); -+ deref->data_type = type; - - if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) -- return; -+ return false; - list_move_before(&instr->entry, &block.instrs); - - hlsl_cleanup_deref(deref); - hlsl_src_from_node(&deref->offset, offset); --} -- --/* TODO: remove when no longer needed. */ --static bool transform_deref_paths_into_offsets(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) --{ -- switch(instr->type) -- { -- case HLSL_IR_LOAD: -- replace_deref_path_with_offset(ctx, &hlsl_ir_load(instr)->src, instr); -- return true; -- -- case HLSL_IR_STORE: -- replace_deref_path_with_offset(ctx, &hlsl_ir_store(instr)->lhs, instr); -- return true; -- -- case HLSL_IR_RESOURCE_LOAD: -- replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->resource, instr); -- replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); -- return true; -- -- case HLSL_IR_RESOURCE_STORE: -- replace_deref_path_with_offset(ctx, &hlsl_ir_resource_store(instr)->resource, instr); -- return true; - -- default: -- return false; -- } -- return false; -+ return true; - } - - /* Split uniforms into two variables representing the constant and temp - * registers, and copy the former to the latter, so that writes to uniforms - * work. */ --static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) -+static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) - { -- struct vkd3d_string_buffer *name; - struct hlsl_ir_var *uniform; -- struct hlsl_ir_store *store; -+ struct hlsl_ir_node *store; - struct hlsl_ir_load *load; -+ char *new_name; - - /* Use the synthetic name for the temp, rather than the uniform, so that we - * can write the uniform name into the shader reflection data. */ - - if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, -- temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) -+ &temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) - return; - list_add_before(&temp->scope_entry, &uniform->scope_entry); - list_add_tail(&ctx->extern_vars, &uniform->extern_entry); -@@ -206,45 +180,111 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru - uniform->is_param = temp->is_param; - uniform->buffer = temp->buffer; - -- if (!(name = hlsl_get_string_buffer(ctx))) -+ if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) - return; -- vkd3d_string_buffer_printf(name, "", temp->name); -- temp->name = hlsl_strdup(ctx, name->buffer); -- hlsl_release_string_buffer(ctx, name); -+ temp->name = new_name; - -- if (!(load = hlsl_new_var_load(ctx, uniform, temp->loc))) -+ if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) - return; -- list_add_head(instrs, &load->node.entry); -+ list_add_head(&block->instrs, &load->node.entry); - - if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) - return; -- list_add_after(&load->node.entry, &store->node.entry); -+ list_add_after(&load->node.entry, &store->entry); -+} -+ -+static void validate_field_semantic(struct hlsl_ctx *ctx, struct hlsl_struct_field *field) -+{ -+ if (!field->semantic.name && hlsl_get_multiarray_element_type(field->type)->class <= HLSL_CLASS_LAST_NUMERIC -+ && !field->semantic.reported_missing) -+ { -+ hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, -+ "Field '%s' is missing a semantic.", field->name); -+ field->semantic.reported_missing = true; -+ } -+} -+ -+static enum hlsl_base_type base_type_get_semantic_equivalent(enum hlsl_base_type base) -+{ -+ if (base == HLSL_TYPE_BOOL) -+ return HLSL_TYPE_UINT; -+ if (base == HLSL_TYPE_INT) -+ return HLSL_TYPE_UINT; -+ if (base == HLSL_TYPE_HALF) -+ return HLSL_TYPE_FLOAT; -+ return base; -+} -+ -+static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hlsl_type *type1, -+ const struct hlsl_type *type2) -+{ -+ if (ctx->profile->major_version < 4) -+ return true; -+ -+ if (type1->dimx != type2->dimx) -+ return false; -+ -+ return base_type_get_semantic_equivalent(type1->base_type) -+ == base_type_get_semantic_equivalent(type2->base_type); - } - - static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -- struct hlsl_type *type, unsigned int modifiers, const struct hlsl_semantic *semantic, bool output) -+ struct hlsl_type *type, unsigned int modifiers, struct hlsl_semantic *semantic, -+ uint32_t index, bool output, const struct vkd3d_shader_location *loc) - { - struct hlsl_semantic new_semantic; -- struct vkd3d_string_buffer *name; - struct hlsl_ir_var *ext_var; -+ char *new_name; - -- if (!(name = hlsl_get_string_buffer(ctx))) -+ if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) - return NULL; -- vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, semantic->index); -+ -+ LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (!ascii_strcasecmp(ext_var->name, new_name)) -+ { -+ if (output) -+ { -+ if (index >= semantic->reported_duplicated_output_next_index) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "Output semantic \"%s%u\" is used multiple times.", semantic->name, index); -+ hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, -+ "First use of \"%s%u\" is here.", semantic->name, index); -+ semantic->reported_duplicated_output_next_index = index + 1; -+ } -+ } -+ else -+ { -+ if (index >= semantic->reported_duplicated_input_incompatible_next_index -+ && !types_are_semantic_equivalent(ctx, ext_var->data_type, type)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "Input semantic \"%s%u\" is used multiple times with incompatible types.", -+ semantic->name, index); -+ hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, -+ "First declaration of \"%s%u\" is here.", semantic->name, index); -+ semantic->reported_duplicated_input_incompatible_next_index = index + 1; -+ } -+ } -+ -+ vkd3d_free(new_name); -+ return ext_var; -+ } -+ } -+ - if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) - { -- hlsl_release_string_buffer(ctx, name); -+ vkd3d_free(new_name); - return NULL; - } -- new_semantic.index = semantic->index; -- if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), -- type, var->loc, &new_semantic, modifiers, NULL))) -+ new_semantic.index = index; -+ if (!(ext_var = hlsl_new_var(ctx, new_name, type, loc, &new_semantic, modifiers, NULL))) - { -- hlsl_release_string_buffer(ctx, name); -+ vkd3d_free(new_name); - hlsl_cleanup_semantic(&new_semantic); - return NULL; - } -- hlsl_release_string_buffer(ctx, name); - if (output) - ext_var->is_output_semantic = 1; - else -@@ -256,132 +296,175 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - return ext_var; - } - --static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, -- unsigned int modifiers, const struct hlsl_semantic *semantic) -+static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, -+ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { -- struct hlsl_type *type = lhs->node.data_type, *vector_type; -+ struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; -+ struct vkd3d_shader_location *loc = &lhs->node.loc; - struct hlsl_ir_var *var = lhs->src.var; -+ struct hlsl_ir_node *c; - unsigned int i; - -- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ if (type->class > HLSL_CLASS_LAST_NUMERIC) -+ { -+ struct vkd3d_string_buffer *string; -+ if (!(string = hlsl_type_to_string(ctx, type))) -+ return; -+ hlsl_fixme(ctx, &var->loc, "Input semantics for type %s.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ if (!semantic->name) -+ return; -+ -+ vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); -+ vector_type_src = vector_type_dst; -+ if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -+ vector_type_src = hlsl_get_vector_type(ctx, type->base_type, 4); - - for (i = 0; i < hlsl_type_major_size(type); ++i) - { -- struct hlsl_semantic semantic_copy = *semantic; -- struct hlsl_ir_store *store; -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *store, *cast; - struct hlsl_ir_var *input; - struct hlsl_ir_load *load; - -- semantic_copy.index = semantic->index + i; -- -- if (!(input = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, false))) -+ if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, -+ semantic_index + i, false, loc))) - return; - -- if (!(load = hlsl_new_var_load(ctx, input, var->loc))) -+ if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) - return; - list_add_after(&lhs->node.entry, &load->node.entry); - -- if (type->type == HLSL_CLASS_MATRIX) -+ if (!(cast = hlsl_new_cast(ctx, &load->node, vector_type_dst, &var->loc))) -+ return; -+ list_add_after(&load->node.entry, &cast->entry); -+ -+ if (type->class == HLSL_CLASS_MATRIX) - { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- list_add_after(&load->node.entry, &c->node.entry); -+ list_add_after(&cast->entry, &c->entry); - -- if (!(store = hlsl_new_store_index(ctx, &lhs->src, &c->node, &load->node, 0, &var->loc))) -+ if (!(store = hlsl_new_store_index(ctx, &lhs->src, c, cast, 0, &var->loc))) - return; -- list_add_after(&c->node.entry, &store->node.entry); -+ list_add_after(&c->entry, &store->entry); - } - else - { - assert(i == 0); - -- if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, &load->node, 0, &var->loc))) -+ if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) - return; -- list_add_after(&load->node.entry, &store->node.entry); -+ list_add_after(&cast->entry, &store->entry); - } - } - } - --static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs) -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, -+ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { -+ struct vkd3d_shader_location *loc = &lhs->node.loc; - struct hlsl_type *type = lhs->node.data_type; - struct hlsl_ir_var *var = lhs->src.var; -- size_t i; -+ struct hlsl_ir_node *c; -+ unsigned int i; - -- for (i = 0; i < type->e.record.field_count; ++i) -+ if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) - { -- const struct hlsl_struct_field *field = &type->e.record.fields[i]; -- struct hlsl_ir_load *field_load; -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_load *element_load; -+ struct hlsl_struct_field *field; -+ uint32_t elem_semantic_index; - -- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) -- return; -- list_add_after(&lhs->node.entry, &c->node.entry); -+ for (i = 0; i < hlsl_type_element_count(type); ++i) -+ { -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elem_semantic_index = semantic_index -+ + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; -+ } -+ else -+ { -+ field = &type->e.record.fields[i]; -+ if (hlsl_type_is_resource(field->type)) -+ continue; -+ validate_field_semantic(ctx, field); -+ semantic = &field->semantic; -+ elem_semantic_index = semantic->index; -+ loc = &field->loc; -+ } - -- /* This redundant load is expected to be deleted later by DCE. */ -- if (!(field_load = hlsl_new_load_index(ctx, &lhs->src, &c->node, &var->loc))) -- return; -- list_add_after(&c->node.entry, &field_load->node.entry); -+ if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) -+ return; -+ list_add_after(&lhs->node.entry, &c->entry); - -- if (field->type->type == HLSL_CLASS_STRUCT) -- prepend_input_struct_copy(ctx, instrs, field_load); -- else if (field->semantic.name) -- prepend_input_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); -- else -- hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, -- "Field '%s' is missing a semantic.", field->name); -+ /* This redundant load is expected to be deleted later by DCE. */ -+ if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) -+ return; -+ list_add_after(&c->entry, &element_load->node.entry); -+ -+ prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); -+ } -+ } -+ else -+ { -+ prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); - } - } - - /* Split inputs into two variables representing the semantic and temp registers, - * and copy the former to the latter, so that writes to input variables work. */ --static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) -+static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ -- if (!(load = hlsl_new_var_load(ctx, var, var->loc))) -+ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_head(instrs, &load->node.entry); -+ list_add_head(&block->instrs, &load->node.entry); - -- if (var->data_type->type == HLSL_CLASS_STRUCT) -- prepend_input_struct_copy(ctx, instrs, load); -- else if (var->semantic.name) -- prepend_input_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); -+ prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - --static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, -- unsigned int modifiers, const struct hlsl_semantic *semantic) -+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, -+ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct hlsl_type *type = rhs->node.data_type, *vector_type; -+ struct vkd3d_shader_location *loc = &rhs->node.loc; - struct hlsl_ir_var *var = rhs->src.var; -+ struct hlsl_ir_node *c; - unsigned int i; - -+ if (type->class > HLSL_CLASS_LAST_NUMERIC) -+ { -+ struct vkd3d_string_buffer *string; -+ if (!(string = hlsl_type_to_string(ctx, type))) -+ return; -+ hlsl_fixme(ctx, &var->loc, "Output semantics for type %s.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ if (!semantic->name) -+ return; -+ - vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); - - for (i = 0; i < hlsl_type_major_size(type); ++i) - { -- struct hlsl_semantic semantic_copy = *semantic; -- struct hlsl_ir_store *store; -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_node *store; - struct hlsl_ir_var *output; - struct hlsl_ir_load *load; - -- semantic_copy.index = semantic->index + i; -- -- if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, true))) -+ if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) - return; - -- if (type->type == HLSL_CLASS_MATRIX) -+ if (type->class == HLSL_CLASS_MATRIX) - { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- list_add_tail(instrs, &c->node.entry); -+ hlsl_block_add_instr(block, c); - -- if (!(load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) -+ if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - } - else - { -@@ -389,65 +472,81 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - } - - if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) - return; -- list_add_tail(instrs, &store->node.entry); -+ hlsl_block_add_instr(block, store); - } - } - --static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs) -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, -+ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { -+ struct vkd3d_shader_location *loc = &rhs->node.loc; - struct hlsl_type *type = rhs->node.data_type; - struct hlsl_ir_var *var = rhs->src.var; -- size_t i; -+ struct hlsl_ir_node *c; -+ unsigned int i; - -- for (i = 0; i < type->e.record.field_count; ++i) -+ if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) - { -- const struct hlsl_struct_field *field = &type->e.record.fields[i]; -- struct hlsl_ir_load *field_load; -- struct hlsl_ir_constant *c; -+ struct hlsl_ir_load *element_load; -+ struct hlsl_struct_field *field; -+ uint32_t elem_semantic_index; - -- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) -- return; -- list_add_tail(instrs, &c->node.entry); -+ for (i = 0; i < hlsl_type_element_count(type); ++i) -+ { -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ elem_semantic_index = semantic_index -+ + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; -+ } -+ else -+ { -+ field = &type->e.record.fields[i]; -+ if (hlsl_type_is_resource(field->type)) -+ continue; -+ validate_field_semantic(ctx, field); -+ semantic = &field->semantic; -+ elem_semantic_index = semantic->index; -+ loc = &field->loc; -+ } - -- /* This redundant load is expected to be deleted later by DCE. */ -- if (!(field_load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) -- return; -- list_add_tail(instrs, &field_load->node.entry); -+ if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) -+ return; -+ hlsl_block_add_instr(block, c); - -- if (field->type->type == HLSL_CLASS_STRUCT) -- append_output_struct_copy(ctx, instrs, field_load); -- else if (field->semantic.name) -- append_output_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); -- else -- hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, -- "Field '%s' is missing a semantic.", field->name); -+ if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) -+ return; -+ hlsl_block_add_instr(block, &element_load->node); -+ -+ append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); -+ } -+ } -+ else -+ { -+ append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); - } - } - - /* Split outputs into two variables representing the temp and semantic - * registers, and copy the former to the latter, so that reads from output - * variables work. */ --static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) -+static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ -- if (!(load = hlsl_new_var_load(ctx, var, var->loc))) -+ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(block, &load->node); - -- if (var->data_type->type == HLSL_CLASS_STRUCT) -- append_output_struct_copy(ctx, instrs, load); -- else if (var->semantic.name) -- append_output_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); -+ append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - --static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -+bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), - struct hlsl_block *block, void *context) - { - struct hlsl_ir_node *instr, *next; -@@ -459,11 +558,11 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - -- progress |= transform_ir(ctx, func, &iff->then_instrs, context); -- progress |= transform_ir(ctx, func, &iff->else_instrs, context); -+ progress |= hlsl_transform_ir(ctx, func, &iff->then_block, context); -+ progress |= hlsl_transform_ir(ctx, func, &iff->else_block, context); - } - else if (instr->type == HLSL_IR_LOOP) -- progress |= transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); -+ progress |= hlsl_transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); - - progress |= func(ctx, instr, context); - } -@@ -471,6 +570,75 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx - return progress; - } - -+typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *); -+ -+static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ PFN_lower_func func = context; -+ struct hlsl_block block; -+ -+ hlsl_block_init(&block); -+ if (func(ctx, instr, &block)) -+ { -+ struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry); -+ -+ list_move_before(&instr->entry, &block.instrs); -+ hlsl_replace_node(instr, replacement); -+ return true; -+ } -+ else -+ { -+ hlsl_block_cleanup(&block); -+ return false; -+ } -+} -+ -+/* Specific form of transform_ir() for passes which convert a single instruction -+ * to a block of one or more instructions. This helper takes care of setting up -+ * the block and calling hlsl_replace_node_with_block(). */ -+static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block) -+{ -+ return hlsl_transform_ir(ctx, call_lower_func, block, func); -+} -+ -+static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ bool res; -+ bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *) = context; -+ -+ switch(instr->type) -+ { -+ case HLSL_IR_LOAD: -+ res = func(ctx, &hlsl_ir_load(instr)->src, instr); -+ return res; -+ -+ case HLSL_IR_STORE: -+ res = func(ctx, &hlsl_ir_store(instr)->lhs, instr); -+ return res; -+ -+ case HLSL_IR_RESOURCE_LOAD: -+ res = func(ctx, &hlsl_ir_resource_load(instr)->resource, instr); -+ if (hlsl_ir_resource_load(instr)->sampler.var) -+ res |= func(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); -+ return res; -+ -+ case HLSL_IR_RESOURCE_STORE: -+ res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); -+ return res; -+ -+ default: -+ return false; -+ } -+ return false; -+} -+ -+static bool transform_derefs(struct hlsl_ctx *ctx, -+ bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *), -+ struct hlsl_block *block) -+{ -+ return hlsl_transform_ir(ctx, transform_instr_derefs, block, func); -+} -+ - struct recursive_call_ctx - { - const struct hlsl_ir_function_decl **backtrace; -@@ -506,7 +674,7 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - return false; - call_ctx->backtrace[call_ctx->count++] = decl; - -- transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); -+ hlsl_transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); - - --call_ctx->count; - -@@ -516,21 +684,23 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - static void insert_early_return_break(struct hlsl_ctx *ctx, - struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) - { -- struct hlsl_ir_jump *jump; -+ struct hlsl_ir_node *iff, *jump; -+ struct hlsl_block then_block; - struct hlsl_ir_load *load; -- struct hlsl_ir_if *iff; - -- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) -+ hlsl_block_init(&then_block); -+ -+ if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) - return; - list_add_after(&cf_instr->entry, &load->node.entry); - -- if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) -+ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) - return; -- list_add_after(&load->node.entry, &iff->node.entry); -+ hlsl_block_add_instr(&then_block, jump); - -- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, cf_instr->loc))) -+ if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc))) - return; -- list_add_tail(&iff->then_instrs.instrs, &jump->node.entry); -+ list_add_after(&load->node.entry, &iff->entry); - } - - /* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ -@@ -566,7 +736,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - * the CF instruction, shove it into an if block, and then lower that if - * block. - * -- * (We could return a "did we make progress" boolean like transform_ir() -+ * (We could return a "did we make progress" boolean like hlsl_transform_ir() - * and run this pass multiple times, but we already know the only block - * that still needs to be addressed, so there's not much point.) - * -@@ -591,8 +761,8 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - -- has_early_return |= lower_return(ctx, func, &iff->then_instrs, in_loop); -- has_early_return |= lower_return(ctx, func, &iff->else_instrs, in_loop); -+ has_early_return |= lower_return(ctx, func, &iff->then_block, in_loop); -+ has_early_return |= lower_return(ctx, func, &iff->else_block, in_loop); - - if (has_early_return) - { -@@ -628,18 +798,17 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - else if (instr->type == HLSL_IR_JUMP) - { - struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -- struct hlsl_ir_constant *constant; -- struct hlsl_ir_store *store; -+ struct hlsl_ir_node *constant, *store; - - if (jump->type == HLSL_IR_JUMP_RETURN) - { - if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) - return false; -- list_add_before(&jump->node.entry, &constant->node.entry); -+ list_add_before(&jump->node.entry, &constant->entry); - -- if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) -+ if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, constant))) - return false; -- list_add_after(&constant->node.entry, &store->node.entry); -+ list_add_after(&constant->entry, &store->entry); - - has_early_return = true; - if (in_loop) -@@ -675,9 +844,9 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - else if (cf_instr) - { - struct list *tail = list_tail(&block->instrs); -+ struct hlsl_ir_node *not, *iff; -+ struct hlsl_block then_block; - struct hlsl_ir_load *load; -- struct hlsl_ir_node *not; -- struct hlsl_ir_if *iff; - - /* If we're in a loop, we should have used "break" instead. */ - assert(!in_loop); -@@ -685,21 +854,21 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - if (tail == &cf_instr->entry) - return has_early_return; - -- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) -- return false; -- list_add_tail(&block->instrs, &load->node.entry); -+ hlsl_block_init(&then_block); -+ list_move_slice_tail(&then_block.instrs, list_next(&block->instrs, &cf_instr->entry), tail); -+ lower_return(ctx, func, &then_block, in_loop); - -- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) -+ if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) - return false; -- list_add_tail(&block->instrs, ¬->entry); -+ hlsl_block_add_instr(block, &load->node); - -- if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) -+ if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, &cf_instr->loc))) - return false; -- list_add_tail(&block->instrs, &iff->node.entry); -- -- list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail); -+ hlsl_block_add_instr(block, not); - -- lower_return(ctx, func, &iff->then_instrs, in_loop); -+ if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &cf_instr->loc))) -+ return false; -+ list_add_tail(&block->instrs, &iff->entry); - } - - return has_early_return; -@@ -721,7 +890,6 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * - hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, - "Function \"%s\" is not defined.", decl->func->name); - -- list_init(&block.instrs); - if (!hlsl_clone_block(ctx, &block, &decl->body)) - return false; - list_move_before(&call->node.entry, &block.instrs); -@@ -731,6 +899,191 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * - return true; - } - -+static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, -+ const struct vkd3d_shader_location *loc) -+{ -+ unsigned int dim_count = index->data_type->dimx; -+ struct hlsl_ir_node *store, *zero; -+ struct hlsl_ir_load *coords_load; -+ struct hlsl_deref coords_deref; -+ struct hlsl_ir_var *coords; -+ -+ assert(dim_count < 4); -+ -+ if (!(coords = hlsl_new_synthetic_var(ctx, "coords", -+ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) -+ return NULL; -+ -+ hlsl_init_simple_deref_from_var(&coords_deref, coords); -+ if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) -+ return NULL; -+ list_add_after(&index->entry, &store->entry); -+ -+ if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) -+ return NULL; -+ list_add_after(&store->entry, &zero->entry); -+ -+ if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) -+ return NULL; -+ list_add_after(&zero->entry, &store->entry); -+ -+ if (!(coords_load = hlsl_new_var_load(ctx, coords, loc))) -+ return NULL; -+ list_add_after(&store->entry, &coords_load->node.entry); -+ -+ return &coords_load->node; -+} -+ -+/* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that -+ * represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of -+ * an assignment or as a value made from different components of the matrix. The former cases should -+ * have already been split into several separate assignments, but the latter are lowered by this -+ * pass. */ -+static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_swizzle *swizzle; -+ struct hlsl_ir_load *var_load; -+ struct hlsl_deref var_deref; -+ struct hlsl_type *matrix_type; -+ struct hlsl_ir_var *var; -+ unsigned int x, y, k, i; -+ -+ if (instr->type != HLSL_IR_SWIZZLE) -+ return false; -+ swizzle = hlsl_ir_swizzle(instr); -+ matrix_type = swizzle->val.node->data_type; -+ if (matrix_type->class != HLSL_CLASS_MATRIX) -+ return false; -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "matrix-swizzle", instr->data_type, &instr->loc))) -+ return false; -+ hlsl_init_simple_deref_from_var(&var_deref, var); -+ -+ for (i = 0; i < instr->data_type->dimx; ++i) -+ { -+ struct hlsl_block store_block; -+ struct hlsl_ir_node *load; -+ -+ y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; -+ x = (swizzle->swizzle >> 8 * i) & 0xf; -+ k = y * matrix_type->dimx + x; -+ -+ if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) -+ return false; -+ -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, load)) -+ return false; -+ hlsl_block_add_block(block, &store_block); -+ } -+ -+ if (!(var_load = hlsl_new_var_load(ctx, var, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, &var_load->node); -+ -+ return true; -+} -+ -+/* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct -+ * record access before knowing if they will be used in the lhs of an assignment --in which case -+ * they are lowered into a deref-- or as the load of an element within a larger value. -+ * For the latter case, this pass takes care of lowering hlsl_ir_indexes into individual -+ * hlsl_ir_loads, or individual hlsl_ir_resource_loads, in case the indexing is a -+ * resource access. */ -+static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *val, *store; -+ struct hlsl_deref var_deref; -+ struct hlsl_ir_index *index; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_var *var; -+ -+ if (instr->type != HLSL_IR_INDEX) -+ return false; -+ index = hlsl_ir_index(instr); -+ val = index->val.node; -+ -+ if (hlsl_index_is_resource_access(index)) -+ { -+ unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim); -+ struct hlsl_ir_node *coords = index->idx.node; -+ struct hlsl_resource_load_params params = {0}; -+ struct hlsl_ir_node *load; -+ -+ assert(coords->data_type->class == HLSL_CLASS_VECTOR); -+ assert(coords->data_type->base_type == HLSL_TYPE_UINT); -+ assert(coords->data_type->dimx == dim_count); -+ -+ if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) -+ return false; -+ -+ params.type = HLSL_RESOURCE_LOAD; -+ params.resource = val; -+ params.coords = coords; -+ params.format = val->data_type->e.resource_format; -+ -+ if (!(load = hlsl_new_resource_load(ctx, ¶ms, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &load->entry); -+ hlsl_replace_node(instr, load); -+ return true; -+ } -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc))) -+ return false; -+ hlsl_init_simple_deref_from_var(&var_deref, var); -+ -+ if (!(store = hlsl_new_simple_store(ctx, var, val))) -+ return false; -+ list_add_before(&instr->entry, &store->entry); -+ -+ if (hlsl_index_is_noncontiguous(index)) -+ { -+ struct hlsl_ir_node *mat = index->val.node; -+ struct hlsl_deref row_deref; -+ unsigned int i; -+ -+ assert(!hlsl_type_is_row_major(mat->data_type)); -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) -+ return false; -+ hlsl_init_simple_deref_from_var(&row_deref, var); -+ -+ for (i = 0; i < mat->data_type->dimx; ++i) -+ { -+ struct hlsl_ir_node *c; -+ -+ if (!(c = hlsl_new_uint_constant(ctx, i, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &c->entry); -+ -+ if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &load->node.entry); -+ -+ if (!(load = hlsl_new_load_index(ctx, &load->src, index->idx.node, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &load->node.entry); -+ -+ if (!(store = hlsl_new_store_index(ctx, &row_deref, c, &load->node, 0, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &store->entry); -+ } -+ -+ if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &load->node.entry); -+ hlsl_replace_node(instr, &load->node); -+ } -+ else -+ { -+ if (!(load = hlsl_new_load_index(ctx, &var_deref, index->idx.node, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &load->node.entry); -+ hlsl_replace_node(instr, &load->node); -+ } -+ return true; -+} -+ - /* Lower casts from vec1 to vecN to swizzles. */ - static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -@@ -746,26 +1099,24 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v - src_type = cast->operands[0].node->data_type; - dst_type = cast->node.data_type; - -- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && src_type->dimx == 1) -+ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) - { -- struct hlsl_ir_node *replacement; -- struct hlsl_ir_swizzle *swizzle; -- struct hlsl_ir_expr *new_cast; -+ struct hlsl_ir_node *replacement, *new_cast, *swizzle; - - dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); - /* We need to preserve the cast since it might be doing more than just - * turning the scalar into a vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) - return false; -- list_add_after(&cast->node.entry, &new_cast->node.entry); -- replacement = &new_cast->node; -+ list_add_after(&cast->node.entry, &new_cast->entry); -+ replacement = new_cast; - - if (dst_type->dimx != 1) - { - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, replacement, &cast->node.loc))) - return false; -- list_add_after(&new_cast->node.entry, &swizzle->node.entry); -- replacement = &swizzle->node; -+ list_add_after(&new_cast->entry, &swizzle->entry); -+ replacement = swizzle; - } - - hlsl_replace_node(&cast->node, replacement); -@@ -949,9 +1300,9 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ - path_node = deref->path[depth].node; - subtype = hlsl_get_element_type_from_path_index(ctx, type, path_node); - -- if (type->type == HLSL_CLASS_STRUCT) -+ if (type->class == HLSL_CLASS_STRUCT) - { -- unsigned int idx = hlsl_ir_constant(path_node)->value[0].u; -+ unsigned int idx = hlsl_ir_constant(path_node)->value.u[0].u; - - for (i = 0; i < idx; ++i) - comp_start += hlsl_type_component_count(type->e.record.fields[i].type); -@@ -966,7 +1317,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ - if (path_node->type == HLSL_IR_CONSTANT) - { - copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, -- depth + 1, hlsl_ir_constant(path_node)->value[0].u * subtype_comp_count, writemask); -+ depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, writemask); - } - else - { -@@ -1041,14 +1392,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, - var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), - new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); - -- if (instr->data_type->type != HLSL_CLASS_OBJECT) -+ if (instr->data_type->class != HLSL_CLASS_OBJECT) - { -- struct hlsl_ir_swizzle *swizzle_node; -+ struct hlsl_ir_node *swizzle_node; - - if (!(swizzle_node = hlsl_new_swizzle(ctx, ret_swizzle, instr_component_count, new_instr, &instr->loc))) - return false; -- list_add_before(&instr->entry, &swizzle_node->node.entry); -- new_instr = &swizzle_node->node; -+ list_add_before(&instr->entry, &swizzle_node->entry); -+ new_instr = swizzle_node; - } - - hlsl_replace_node(instr, new_instr); -@@ -1061,9 +1412,9 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, - { - const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); - const struct hlsl_ir_var *var = deref->var; -- union hlsl_constant_value values[4] = {0}; -- struct hlsl_ir_constant *cons; -+ struct hlsl_constant_value values = {0}; - unsigned int start, count, i; -+ struct hlsl_ir_node *cons; - - if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) - return false; -@@ -1076,21 +1427,17 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, - || value->node->type != HLSL_IR_CONSTANT) - return false; - -- values[i] = hlsl_ir_constant(value->node)->value[value->component]; -+ values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component]; - } - -- if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) -+ if (!(cons = hlsl_new_constant(ctx, instr->data_type, &values, &instr->loc))) - return false; -- cons->value[0] = values[0]; -- cons->value[1] = values[1]; -- cons->value[2] = values[2]; -- cons->value[3] = values[3]; -- list_add_before(&instr->entry, &cons->node.entry); -+ list_add_before(&instr->entry, &cons->entry); - - TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n", - var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), cons); - -- hlsl_replace_node(instr, &cons->node); -+ hlsl_replace_node(instr, cons); - return true; - } - -@@ -1099,7 +1446,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - { - struct hlsl_type *type = load->node.data_type; - -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -@@ -1220,7 +1567,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s - { - unsigned int writemask = store->writemask; - -- if (store->rhs.node->data_type->type == HLSL_CLASS_OBJECT) -+ if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) - writemask = VKD3DSP_WRITEMASK_0; - copy_propagation_set_value(var_def, start, writemask, store->rhs.node); - } -@@ -1270,8 +1617,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - -- copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); -- copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); -+ copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); -+ copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); - - break; - } -@@ -1301,19 +1648,19 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if - bool progress = false; - - copy_propagation_state_init(ctx, &inner_state, state); -- progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state); -+ progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); - copy_propagation_state_destroy(&inner_state); - - copy_propagation_state_init(ctx, &inner_state, state); -- progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state); -+ progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); - copy_propagation_state_destroy(&inner_state); - - /* Ideally we'd invalidate the outer state looking at what was - * touched in the two inner states, but this doesn't work for - * loops (because we need to know what is invalidated in advance), - * so we need copy_propagation_invalidate_from_block() anyway. */ -- copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); -- copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); -+ copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); -+ copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); - - return progress; - } -@@ -1379,7 +1726,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b - return progress; - } - --static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) -+bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) - { - struct copy_propagation_state state; - bool progress; -@@ -1419,7 +1766,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - -- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!load->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource must have a single uniform source."); -@@ -1434,7 +1781,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - - if (load->sampler.var) - { -- if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!load->sampler.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler must have a single uniform source."); -@@ -1452,7 +1799,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - { - struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - -- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (!store->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); -@@ -1471,7 +1818,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ - - static bool is_vec1(const struct hlsl_type *type) - { -- return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); -+ return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); - } - - static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -1505,21 +1852,20 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, - const struct hlsl_ir_load *load, const unsigned int idx, struct hlsl_type *type) - { -- struct hlsl_ir_store *split_store; -+ struct hlsl_ir_node *split_store, *c; - struct hlsl_ir_load *split_load; -- struct hlsl_ir_constant *c; - - if (!(c = hlsl_new_uint_constant(ctx, idx, &store->node.loc))) - return false; -- list_add_before(&store->node.entry, &c->node.entry); -+ list_add_before(&store->node.entry, &c->entry); - -- if (!(split_load = hlsl_new_load_index(ctx, &load->src, &c->node, &store->node.loc))) -+ if (!(split_load = hlsl_new_load_index(ctx, &load->src, c, &store->node.loc))) - return false; - list_add_before(&store->node.entry, &split_load->node.entry); - -- if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, &c->node, &split_load->node, 0, &store->node.loc))) -+ if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, c, &split_load->node, 0, &store->node.loc))) - return false; -- list_add_before(&store->node.entry, &split_store->node.entry); -+ list_add_before(&store->node.entry, &split_store->entry); - - return true; - } -@@ -1538,7 +1884,7 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - store = hlsl_ir_store(instr); - rhs = store->rhs.node; - type = rhs->data_type; -- if (type->type != HLSL_CLASS_ARRAY) -+ if (type->class != HLSL_CLASS_ARRAY) - return false; - element_type = type->e.array.type; - -@@ -1575,7 +1921,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - store = hlsl_ir_store(instr); - rhs = store->rhs.node; - type = rhs->data_type; -- if (type->type != HLSL_CLASS_STRUCT) -+ if (type->class != HLSL_CLASS_STRUCT) - return false; - - if (rhs->type != HLSL_IR_LOAD) -@@ -1614,13 +1960,13 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - store = hlsl_ir_store(instr); - rhs = store->rhs.node; - type = rhs->data_type; -- if (type->type != HLSL_CLASS_MATRIX) -+ if (type->class != HLSL_CLASS_MATRIX) - return false; - element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); - - if (rhs->type != HLSL_IR_LOAD) - { -- hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); -+ hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); - return false; - } - -@@ -1649,22 +1995,21 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - src_type = cast->operands[0].node->data_type; - dst_type = cast->node.data_type; - -- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) -+ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) - { -- struct hlsl_ir_swizzle *swizzle; -- struct hlsl_ir_expr *new_cast; -+ struct hlsl_ir_node *new_cast, *swizzle; - - dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); - /* We need to preserve the cast since it might be doing more than just - * narrowing the vector. */ - if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) - return false; -- list_add_after(&cast->node.entry, &new_cast->node.entry); -- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, &new_cast->node, &cast->node.loc))) -+ list_add_after(&cast->node.entry, &new_cast->entry); -+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) - return false; -- list_add_after(&new_cast->node.entry, &swizzle->node.entry); -+ list_add_after(&new_cast->entry, &swizzle->entry); - -- hlsl_replace_node(&cast->node, &swizzle->node); -+ hlsl_replace_node(&cast->node, swizzle); - return true; - } - -@@ -1684,8 +2029,7 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - - if (next_instr->type == HLSL_IR_SWIZZLE) - { -- struct hlsl_ir_swizzle *new_swizzle; -- struct hlsl_ir_node *new_instr; -+ struct hlsl_ir_node *new_swizzle; - unsigned int combined_swizzle; - - combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, -@@ -1695,9 +2039,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) - return false; - -- new_instr = &new_swizzle->node; -- list_add_before(&instr->entry, &new_instr->entry); -- hlsl_replace_node(instr, new_instr); -+ list_add_before(&instr->entry, &new_swizzle->entry); -+ hlsl_replace_node(instr, new_swizzle); - return true; - } - -@@ -1725,6 +2068,212 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i - return true; - } - -+static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *idx; -+ struct hlsl_deref *deref; -+ struct hlsl_type *type; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_LOAD) -+ return false; -+ -+ deref = &hlsl_ir_load(instr)->src; -+ assert(deref->var); -+ -+ if (deref->path_len == 0) -+ return false; -+ -+ type = deref->var->data_type; -+ for (i = 0; i < deref->path_len - 1; ++i) -+ type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); -+ -+ idx = deref->path[deref->path_len - 1].node; -+ -+ if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) -+ { -+ struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_constant_value value; -+ struct hlsl_ir_load *vector_load; -+ enum hlsl_ir_expr_op op; -+ -+ if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &vector_load->node.entry); -+ -+ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &swizzle->entry); -+ -+ value.u[0].u = 0; -+ value.u[1].u = 1; -+ value.u[2].u = 2; -+ value.u[3].u = 3; -+ if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &c->entry); -+ -+ operands[0] = swizzle; -+ operands[1] = c; -+ if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &eq->entry); -+ -+ if (!(eq = hlsl_new_cast(ctx, eq, type, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &eq->entry); -+ -+ op = HLSL_OP2_DOT; -+ if (type->dimx == 1) -+ op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; -+ -+ /* Note: We may be creating a DOT for bool vectors here, which we need to lower to -+ * LOGIC_OR + LOGIC_AND. */ -+ operands[0] = &vector_load->node; -+ operands[1] = eq; -+ if (!(dot = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &dot->entry); -+ hlsl_replace_node(instr, dot); -+ -+ return true; -+ } -+ -+ return false; -+} -+ -+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. -+ * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ -+static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_resource_load *load; -+ struct vkd3d_string_buffer *name; -+ struct hlsl_ir_var *var; -+ unsigned int i; -+ -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ load = hlsl_ir_resource_load(instr); -+ -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_LOAD: -+ case HLSL_RESOURCE_GATHER_RED: -+ case HLSL_RESOURCE_GATHER_GREEN: -+ case HLSL_RESOURCE_GATHER_BLUE: -+ case HLSL_RESOURCE_GATHER_ALPHA: -+ case HLSL_RESOURCE_RESINFO: -+ case HLSL_RESOURCE_SAMPLE_CMP: -+ case HLSL_RESOURCE_SAMPLE_CMP_LZ: -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ case HLSL_RESOURCE_SAMPLE_INFO: -+ return false; -+ -+ case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_LOD: -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ break; -+ } -+ if (load->sampler.var) -+ return false; -+ -+ if (!hlsl_type_is_resource(load->resource.var->data_type)) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); -+ return false; -+ } -+ -+ assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); -+ -+ if (!(name = hlsl_get_string_buffer(ctx))) -+ return false; -+ vkd3d_string_buffer_printf(name, "%s", load->resource.var->name); -+ -+ TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); -+ -+ if (!(var = hlsl_get_var(ctx->globals, name->buffer))) -+ { -+ struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); -+ -+ /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ -+ struct hlsl_type *arr_type = load->resource.var->data_type; -+ for (i = 0; i < load->resource.path_len; ++i) -+ { -+ assert(arr_type->class == HLSL_CLASS_ARRAY); -+ texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); -+ arr_type = arr_type->e.array.type; -+ } -+ -+ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false))) -+ { -+ hlsl_release_string_buffer(ctx, name); -+ return false; -+ } -+ var->is_uniform = 1; -+ var->is_separated_resource = true; -+ -+ list_add_tail(&ctx->extern_vars, &var->extern_entry); -+ } -+ hlsl_release_string_buffer(ctx, name); -+ -+ if (load->sampling_dim != var->data_type->sampler_dim) -+ { -+ hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, -+ "Cannot split combined samplers from \"%s\" if they have different usage dimensions.", -+ load->resource.var->name); -+ hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here."); -+ return false; -+ -+ } -+ -+ hlsl_copy_deref(ctx, &load->sampler, &load->resource); -+ load->resource.var = var; -+ assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); -+ assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); -+ -+ return true; -+} -+ -+static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, -+ enum hlsl_regset regset) -+{ -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->bind_count[regset] < to_add->bind_count[regset]) -+ { -+ list_add_before(&var->extern_entry, &to_add->extern_entry); -+ return; -+ } -+ } -+ -+ list_add_tail(list, &to_add->extern_entry); -+} -+ -+static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) -+{ -+ struct list separated_resources; -+ struct hlsl_ir_var *var, *next; -+ -+ list_init(&separated_resources); -+ -+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_separated_resource) -+ { -+ list_remove(&var->extern_entry); -+ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); -+ } -+ } -+ -+ list_move_head(&ctx->extern_vars, &separated_resources); -+ -+ return false; -+} -+ - /* Lower DIV to RCP + MUL. */ - static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -@@ -1737,7 +2286,7 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi - if (expr->op != HLSL_OP2_DIV) - return false; - -- if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, instr->loc))) -+ if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, &instr->loc))) - return false; - list_add_before(&expr->node.entry, &rcp->entry); - expr->op = HLSL_OP2_MUL; -@@ -1758,7 +2307,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c - if (expr->op != HLSL_OP1_SQRT) - return false; - -- if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, instr->loc))) -+ if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, &instr->loc))) - return false; - list_add_before(&expr->node.entry, &rsq->entry); - expr->op = HLSL_OP1_RCP; -@@ -1770,9 +2319,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c - /* Lower DP2 to MUL + ADD */ - static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_node *arg1, *arg2, *mul, *replacement; -- struct hlsl_ir_swizzle *add_x, *add_y; -- struct hlsl_ir_constant *zero; -+ struct hlsl_ir_node *arg1, *arg2, *mul, *replacement, *zero, *add_x, *add_y; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) -@@ -1791,11 +2338,11 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co - - if (!(zero = hlsl_new_float_constant(ctx, 0.0f, &expr->node.loc))) - return false; -- list_add_before(&instr->entry, &zero->node.entry); -+ list_add_before(&instr->entry, &zero->entry); - - operands[0] = arg1; - operands[1] = arg2; -- operands[2] = &zero->node; -+ operands[2] = zero; - - if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc))) - return false; -@@ -1808,13 +2355,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co - - if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) - return false; -- list_add_before(&instr->entry, &add_x->node.entry); -+ list_add_before(&instr->entry, &add_x->entry); - - if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) - return false; -- list_add_before(&instr->entry, &add_y->node.entry); -+ list_add_before(&instr->entry, &add_y->entry); - -- if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, &add_x->node, &add_y->node))) -+ if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, add_x, add_y))) - return false; - } - list_add_before(&instr->entry, &replacement->entry); -@@ -1836,7 +2383,7 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co - if (expr->op != HLSL_OP1_ABS) - return false; - -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) - return false; - list_add_before(&instr->entry, &neg->entry); - -@@ -1848,10 +2395,104 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co - return true; - } - -+/* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */ -+static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *arg, *neg, *sum, *frc, *half, *replacement; -+ struct hlsl_type *type = instr->data_type; -+ struct hlsl_constant_value half_value; -+ unsigned int i, component_count; -+ struct hlsl_ir_expr *expr; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(instr); -+ arg = expr->operands[0].node; -+ if (expr->op != HLSL_OP1_ROUND) -+ return false; -+ -+ component_count = hlsl_type_component_count(type); -+ for (i = 0; i < component_count; ++i) -+ half_value.u[i].f = 0.5f; -+ if (!(half = hlsl_new_constant(ctx, type, &half_value, &expr->node.loc))) -+ return false; -+ -+ list_add_before(&instr->entry, &half->entry); -+ -+ if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, half))) -+ return false; -+ list_add_before(&instr->entry, &sum->entry); -+ -+ if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, sum, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &frc->entry); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, frc, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &neg->entry); -+ -+ if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, sum, neg))) -+ return false; -+ list_add_before(&instr->entry, &replacement->entry); -+ -+ hlsl_replace_node(instr, replacement); -+ return true; -+} -+ -+/* Use 'movc' for the ternary operator. */ -+static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], *replacement; -+ struct hlsl_ir_node *zero, *cond, *first, *second; -+ struct hlsl_constant_value zero_value = { 0 }; -+ struct hlsl_ir_expr *expr; -+ struct hlsl_type *type; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(instr); -+ if (expr->op != HLSL_OP3_TERNARY) -+ return false; -+ -+ cond = expr->operands[0].node; -+ first = expr->operands[1].node; -+ second = expr->operands[2].node; -+ -+ if (cond->data_type->base_type == HLSL_TYPE_FLOAT) -+ { -+ if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc))) -+ return false; -+ list_add_tail(&instr->entry, &zero->entry); -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = zero; -+ operands[1] = cond; -+ type = cond->data_type; -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); -+ if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &cond->entry); -+ } -+ -+ memset(operands, 0, sizeof(operands)); -+ operands[0] = cond; -+ operands[1] = first; -+ operands[2] = second; -+ if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_MOVC, operands, first->data_type, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &replacement->entry); -+ -+ hlsl_replace_node(instr, replacement); -+ return true; -+} -+ - static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_type *type = instr->data_type, *arg_type; -- struct hlsl_ir_constant *zero; -+ static const struct hlsl_constant_value zero_value; -+ struct hlsl_ir_node *zero; - struct hlsl_ir_expr *expr; - - if (instr->type != HLSL_IR_EXPR) -@@ -1860,7 +2501,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - if (expr->op != HLSL_OP1_CAST) - return false; - arg_type = expr->operands[0].node->data_type; -- if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) -+ if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) - return false; - if (type->base_type != HLSL_TYPE_BOOL) - return false; -@@ -1868,57 +2509,58 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - /* Narrowing casts should have already been lowered. */ - assert(type->dimx == arg_type->dimx); - -- zero = hlsl_new_constant(ctx, arg_type, &instr->loc); -+ zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); - if (!zero) - return false; -- list_add_before(&instr->entry, &zero->node.entry); -+ list_add_before(&instr->entry, &zero->entry); - - expr->op = HLSL_OP2_NEQUAL; -- hlsl_src_from_node(&expr->operands[1], &zero->node); -+ hlsl_src_from_node(&expr->operands[1], zero); - - return true; - } - --struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, -+struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) - { -- struct hlsl_ir_store *store; -+ struct hlsl_block then_block, else_block; -+ struct hlsl_ir_node *iff, *store; - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; -- struct hlsl_ir_if *iff; - - assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); - - if (!(var = hlsl_new_synthetic_var(ctx, "conditional", if_true->data_type, &condition->loc))) - return NULL; - -- if (!(iff = hlsl_new_if(ctx, condition, condition->loc))) -- return NULL; -- list_add_tail(instrs, &iff->node.entry); -+ hlsl_block_init(&then_block); -+ hlsl_block_init(&else_block); - - if (!(store = hlsl_new_simple_store(ctx, var, if_true))) - return NULL; -- list_add_tail(&iff->then_instrs.instrs, &store->node.entry); -+ hlsl_block_add_instr(&then_block, store); - - if (!(store = hlsl_new_simple_store(ctx, var, if_false))) - return NULL; -- list_add_tail(&iff->else_instrs.instrs, &store->node.entry); -+ hlsl_block_add_instr(&else_block, store); -+ -+ if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) -+ return NULL; -+ hlsl_block_add_instr(instrs, iff); - -- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) -+ if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) - return NULL; -- list_add_tail(instrs, &load->node.entry); -+ hlsl_block_add_instr(instrs, &load->node); - -- return load; -+ return &load->node; - } - --static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg; -+ struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; -- struct hlsl_ir_expr *cast1, *cast2, *cast3; -- struct hlsl_ir_constant *high_bit; -+ struct hlsl_constant_value high_bit_value; - struct hlsl_ir_expr *expr; -- struct hlsl_ir_load *cond; - unsigned int i; - - if (instr->type != HLSL_IR_EXPR) -@@ -1928,69 +2570,63 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - arg2 = expr->operands[1].node; - if (expr->op != HLSL_OP2_DIV) - return false; -- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) -+ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; - if (type->base_type != HLSL_TYPE_INT) - return false; -- utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); -+ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); - - if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) - return false; -- list_add_before(&instr->entry, &xor->entry); -+ hlsl_block_add_instr(block, xor); - -- if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) -- return false; - for (i = 0; i < type->dimx; ++i) -- high_bit->value[i].u = 0x80000000; -- list_add_before(&instr->entry, &high_bit->node.entry); -+ high_bit_value.u[i].u = 0x80000000; -+ if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, high_bit); - -- if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, &high_bit->node))) -+ if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) - return false; -- list_add_before(&instr->entry, &and->entry); -+ hlsl_block_add_instr(block, and); - -- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) -+ if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs1->entry); -+ hlsl_block_add_instr(block, abs1); - - if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast1->node.entry); -+ hlsl_block_add_instr(block, cast1); - -- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) -+ if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs2->entry); -+ hlsl_block_add_instr(block, abs2); - - if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast2->node.entry); -+ hlsl_block_add_instr(block, cast2); - -- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &cast1->node, &cast2->node))) -+ if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast3->node.entry); -- -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) -- return false; -- list_add_before(&instr->entry, &neg->entry); -+ hlsl_block_add_instr(block, cast3); - -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; -- hlsl_replace_node(instr, &cond->node); -+ hlsl_block_add_instr(block, neg); - -- return true; -+ return hlsl_add_conditional(ctx, block, and, neg, cast3); - } - --static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { -- struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg; -+ struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit; - struct hlsl_type *type = instr->data_type, *utype; -- struct hlsl_ir_expr *cast1, *cast2, *cast3; -- struct hlsl_ir_constant *high_bit; -+ struct hlsl_constant_value high_bit_value; - struct hlsl_ir_expr *expr; -- struct hlsl_ir_load *cond; - unsigned int i; - - if (instr->type != HLSL_IR_EXPR) -@@ -2000,55 +2636,51 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - arg2 = expr->operands[1].node; - if (expr->op != HLSL_OP2_MOD) - return false; -- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) -+ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; - if (type->base_type != HLSL_TYPE_INT) - return false; -- utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); -+ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); - -- if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) -- return false; - for (i = 0; i < type->dimx; ++i) -- high_bit->value[i].u = 0x80000000; -- list_add_before(&instr->entry, &high_bit->node.entry); -+ high_bit_value.u[i].u = 0x80000000; -+ if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, high_bit); - -- if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, &high_bit->node))) -+ if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) - return false; -- list_add_before(&instr->entry, &and->entry); -+ hlsl_block_add_instr(block, and); - -- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) -+ if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs1->entry); -+ hlsl_block_add_instr(block, abs1); - - if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast1->node.entry); -+ hlsl_block_add_instr(block, cast1); - -- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) -+ if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &abs2->entry); -+ hlsl_block_add_instr(block, abs2); - - if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast2->node.entry); -+ hlsl_block_add_instr(block, cast2); - -- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, &cast1->node, &cast2->node))) -+ if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) - return false; -- list_add_before(&instr->entry, &cast3->node.entry); -- -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) -- return false; -- list_add_before(&instr->entry, &neg->entry); -+ hlsl_block_add_instr(block, cast3); - -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) - return false; -- hlsl_replace_node(instr, &cond->node); -+ hlsl_block_add_instr(block, neg); - -- return true; -+ return hlsl_add_conditional(ctx, block, and, neg, cast3); - } - - static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2063,14 +2695,14 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void - - if (expr->op != HLSL_OP1_ABS) - return false; -- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) -+ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; - if (type->base_type != HLSL_TYPE_INT) - return false; - - arg = expr->operands[0].node; - -- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) - return false; - list_add_before(&instr->entry, &neg->entry); - -@@ -2080,12 +2712,63 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void - return true; - } - --static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc; -+ struct hlsl_ir_node *arg1, *arg2, *mult, *comps[4] = {0}, *res; -+ struct hlsl_type *type = instr->data_type; -+ struct hlsl_ir_expr *expr; -+ unsigned int i, dimx; -+ bool is_bool; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ -+ if (expr->op != HLSL_OP2_DOT) -+ return false; -+ -+ if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT -+ || type->base_type == HLSL_TYPE_BOOL) -+ { -+ arg1 = expr->operands[0].node; -+ arg2 = expr->operands[1].node; -+ assert(arg1->data_type->dimx == arg2->data_type->dimx); -+ dimx = arg1->data_type->dimx; -+ is_bool = type->base_type == HLSL_TYPE_BOOL; -+ -+ if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) -+ return false; -+ list_add_before(&instr->entry, &mult->entry); -+ -+ for (i = 0; i < dimx; ++i) -+ { -+ unsigned int s = hlsl_swizzle_from_writemask(1 << i); -+ -+ if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, mult, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &comps[i]->entry); -+ } -+ -+ res = comps[0]; -+ for (i = 1; i < dimx; ++i) -+ { -+ if (!(res = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]))) -+ return false; -+ list_add_before(&instr->entry, &res->entry); -+ } -+ -+ hlsl_replace_node(instr, res); -+ return true; -+ } -+ -+ return false; -+} -+ -+static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one, *mul3; - struct hlsl_type *type = instr->data_type, *btype; -- struct hlsl_ir_constant *one; -- struct hlsl_ir_load *cond; -+ struct hlsl_constant_value one_value; - struct hlsl_ir_expr *expr; - unsigned int i; - -@@ -2096,55 +2779,108 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - arg2 = expr->operands[1].node; - if (expr->op != HLSL_OP2_MOD) - return false; -- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) -+ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) - return false; - if (type->base_type != HLSL_TYPE_FLOAT) - return false; -- btype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_BOOL, type->dimx, type->dimy); -+ btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); - - if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) - return false; -- list_add_before(&instr->entry, &mul1->entry); -+ hlsl_block_add_instr(block, mul1); - -- if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, instr->loc))) -+ if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg1->entry); -+ hlsl_block_add_instr(block, neg1); - - if (!(ge = hlsl_new_binary_expr(ctx, HLSL_OP2_GEQUAL, mul1, neg1))) - return false; - ge->data_type = btype; -- list_add_before(&instr->entry, &ge->entry); -+ hlsl_block_add_instr(block, ge); - -- if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, instr->loc))) -+ if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &neg2->entry); -+ hlsl_block_add_instr(block, neg2); - -- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) -+ if (!(cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2))) - return false; - -- if (!(one = hlsl_new_constant(ctx, type, &instr->loc))) -- return false; - for (i = 0; i < type->dimx; ++i) -- one->value[i].f = 1.0f; -- list_add_before(&instr->entry, &one->node.entry); -+ one_value.u[i].f = 1.0f; -+ if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, one); - -- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &one->node, &cond->node))) -+ if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) - return false; -- list_add_before(&instr->entry, &div->entry); -+ hlsl_block_add_instr(block, div); - - if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, div, arg1))) - return false; -- list_add_before(&instr->entry, &mul2->entry); -+ hlsl_block_add_instr(block, mul2); - -- if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, instr->loc))) -+ if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) - return false; -- list_add_before(&instr->entry, &frc->entry); -+ hlsl_block_add_instr(block, frc); - -- expr->op = HLSL_OP2_MUL; -- hlsl_src_remove(&expr->operands[0]); -- hlsl_src_remove(&expr->operands[1]); -- hlsl_src_from_node(&expr->operands[0], frc); -- hlsl_src_from_node(&expr->operands[1], &cond->node); -+ if (!(mul3 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, frc, cond))) -+ return false; -+ hlsl_block_add_instr(block, mul3); -+ -+ return true; -+} -+ -+static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; -+ static const struct hlsl_constant_value zero_value; -+ struct hlsl_type *arg_type, *cmp_type; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; -+ struct hlsl_ir_jump *jump; -+ struct hlsl_block block; -+ unsigned int i, count; -+ -+ if (instr->type != HLSL_IR_JUMP) -+ return false; -+ jump = hlsl_ir_jump(instr); -+ if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) -+ return false; -+ -+ hlsl_block_init(&block); -+ -+ arg_type = jump->condition.node->data_type; -+ if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, zero); -+ -+ operands[0] = jump->condition.node; -+ operands[1] = zero; -+ cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); -+ if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, cmp); -+ -+ if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, bool_false); -+ -+ or = bool_false; -+ -+ count = hlsl_type_component_count(cmp_type); -+ for (i = 0; i < count; ++i) -+ { -+ if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) -+ return false; -+ -+ if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) -+ return NULL; -+ hlsl_block_add_instr(&block, or); -+ } -+ -+ list_move_tail(&instr->entry, &block.instrs); -+ hlsl_src_remove(&jump->condition); -+ hlsl_src_from_node(&jump->condition, or); -+ jump->type = HLSL_IR_JUMP_DISCARD_NZ; - - return true; - } -@@ -2155,6 +2891,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - case HLSL_IR_CONSTANT: - case HLSL_IR_EXPR: -+ case HLSL_IR_INDEX: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_SWIZZLE: -@@ -2204,8 +2941,8 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in - if (instr->type == HLSL_IR_IF) - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); -- index = index_instructions(&iff->then_instrs, index); -- index = index_instructions(&iff->else_instrs, index); -+ index = index_instructions(&iff->then_block, index); -+ index = index_instructions(&iff->else_block, index); - } - else if (instr->type == HLSL_IR_LOOP) - { -@@ -2262,9 +2999,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - continue; - regset = hlsl_type_get_regset(var->data_type); - -- if (var->reg_reservation.type) -+ if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) - { -- if (var->reg_reservation.type != get_regset_name(regset)) -+ if (var->reg_reservation.reg_type != get_regset_name(regset)) - { - struct vkd3d_string_buffer *type_string; - -@@ -2277,8 +3014,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - else - { - var->regs[regset].allocated = true; -- var->regs[regset].id = var->reg_reservation.index; -- TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.type, var->reg_reservation.index); -+ var->regs[regset].id = var->reg_reservation.reg_index; -+ TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, -+ var->reg_reservation.reg_index, var->reg_reservation.reg_type, -+ var->reg_reservation.reg_index + var->regs[regset].allocation_size); - } - } - } -@@ -2286,9 +3025,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - - /* Compute the earliest and latest liveness for each variable. In the case that - * a variable is accessed inside of a loop, we promote its liveness to extend -- * to at least the range of the entire loop. Note that we don't need to do this -- * for anonymous nodes, since there's currently no way to use a node which was -- * calculated in an earlier iteration of the loop. */ -+ * to at least the range of the entire loop. We also do this for nodes, so that -+ * nodes produced before the loop have their temp register protected from being -+ * overridden after the last read within an iteration. */ - static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) - { - struct hlsl_ir_node *instr; -@@ -2296,7 +3035,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { -- const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; -+ const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index; - - switch (instr->type) - { -@@ -2311,9 +3050,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - var = store->lhs.var; - if (!var->first_write) - var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; -- store->rhs.node->last_read = instr->index; -+ store->rhs.node->last_read = last_read; - if (store->lhs.offset.node) -- store->lhs.offset.node->last_read = instr->index; -+ store->lhs.offset.node->last_read = last_read; - break; - } - case HLSL_IR_EXPR: -@@ -2322,16 +3061,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) -- expr->operands[i].node->last_read = instr->index; -+ expr->operands[i].node->last_read = last_read; - break; - } - case HLSL_IR_IF: - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - -- compute_liveness_recurse(&iff->then_instrs, loop_first, loop_last); -- compute_liveness_recurse(&iff->else_instrs, loop_first, loop_last); -- iff->condition.node->last_read = instr->index; -+ compute_liveness_recurse(&iff->then_block, loop_first, loop_last); -+ compute_liveness_recurse(&iff->else_block, loop_first, loop_last); -+ iff->condition.node->last_read = last_read; - break; - } - case HLSL_IR_LOAD: -@@ -2339,9 +3078,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - struct hlsl_ir_load *load = hlsl_ir_load(instr); - - var = load->src.var; -- var->last_read = max(var->last_read, var_last_read); -+ var->last_read = max(var->last_read, last_read); - if (load->src.offset.node) -- load->src.offset.node->last_read = instr->index; -+ load->src.offset.node->last_read = last_read; - break; - } - case HLSL_IR_LOOP: -@@ -2357,22 +3096,31 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - - var = load->resource.var; -- var->last_read = max(var->last_read, var_last_read); -+ var->last_read = max(var->last_read, last_read); - if (load->resource.offset.node) -- load->resource.offset.node->last_read = instr->index; -+ load->resource.offset.node->last_read = last_read; - - if ((var = load->sampler.var)) - { -- var->last_read = max(var->last_read, var_last_read); -+ var->last_read = max(var->last_read, last_read); - if (load->sampler.offset.node) -- load->sampler.offset.node->last_read = instr->index; -+ load->sampler.offset.node->last_read = last_read; - } - -- load->coords.node->last_read = instr->index; -+ if (load->coords.node) -+ load->coords.node->last_read = last_read; - if (load->texel_offset.node) -- load->texel_offset.node->last_read = instr->index; -+ load->texel_offset.node->last_read = last_read; - if (load->lod.node) -- load->lod.node->last_read = instr->index; -+ load->lod.node->last_read = last_read; -+ if (load->ddx.node) -+ load->ddx.node->last_read = last_read; -+ if (load->ddy.node) -+ load->ddy.node->last_read = last_read; -+ if (load->sample_index.node) -+ load->sample_index.node->last_read = last_read; -+ if (load->cmp.node) -+ load->cmp.node->last_read = last_read; - break; - } - case HLSL_IR_RESOURCE_STORE: -@@ -2380,22 +3128,37 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - - var = store->resource.var; -- var->last_read = max(var->last_read, var_last_read); -+ var->last_read = max(var->last_read, last_read); - if (store->resource.offset.node) -- store->resource.offset.node->last_read = instr->index; -- store->coords.node->last_read = instr->index; -- store->value.node->last_read = instr->index; -+ store->resource.offset.node->last_read = last_read; -+ store->coords.node->last_read = last_read; -+ store->value.node->last_read = last_read; - break; - } - case HLSL_IR_SWIZZLE: - { - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - -- swizzle->val.node->last_read = instr->index; -+ swizzle->val.node->last_read = last_read; -+ break; -+ } -+ case HLSL_IR_INDEX: -+ { -+ struct hlsl_ir_index *index = hlsl_ir_index(instr); -+ -+ index->val.node->last_read = last_read; -+ index->idx.node->last_read = last_read; - break; - } -- case HLSL_IR_CONSTANT: - case HLSL_IR_JUMP: -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); -+ -+ if (jump->condition.node) -+ jump->condition.node->last_read = last_read; -+ break; -+ } -+ case HLSL_IR_CONSTANT: - break; - } - } -@@ -2426,127 +3189,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - compute_liveness_recurse(&entry_func->body, 0, 0); - } - --struct liveness -+struct register_allocator - { -- size_t size; -- uint32_t reg_count; -- struct -+ size_t count, capacity; -+ -+ /* Highest register index that has been allocated. -+ * Used to declare sm4 temp count. */ -+ uint32_t max_reg; -+ -+ struct allocation - { -- /* 0 if not live yet. */ -- unsigned int last_read; -- } *regs; -+ uint32_t reg; -+ unsigned int writemask; -+ unsigned int first_write, last_read; -+ } *allocations; - }; - --static unsigned int get_available_writemask(struct liveness *liveness, -- unsigned int first_write, unsigned int component_idx, unsigned int reg_size) -+static unsigned int get_available_writemask(const struct register_allocator *allocator, -+ unsigned int first_write, unsigned int last_read, uint32_t reg_idx) - { -- unsigned int i, writemask = 0, count = 0; -+ unsigned int writemask = VKD3DSP_WRITEMASK_ALL; -+ size_t i; - -- for (i = 0; i < 4; ++i) -+ for (i = 0; i < allocator->count; ++i) - { -- if (liveness->regs[component_idx + i].last_read <= first_write) -- { -- writemask |= 1u << i; -- if (++count == reg_size) -- return writemask; -- } -+ const struct allocation *allocation = &allocator->allocations[i]; -+ -+ /* We do not overlap if first write == last read: -+ * this is the case where we are allocating the result of that -+ * expression, e.g. "add r0, r0, r1". */ -+ -+ if (allocation->reg == reg_idx -+ && first_write < allocation->last_read && last_read > allocation->first_write) -+ writemask &= ~allocation->writemask; -+ -+ if (!writemask) -+ break; - } - -- return 0; -+ return writemask; - } - --static bool resize_liveness(struct hlsl_ctx *ctx, struct liveness *liveness, size_t new_count) -+static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, -+ uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) - { -- size_t old_capacity = liveness->size; -+ struct allocation *allocation; - -- if (!hlsl_array_reserve(ctx, (void **)&liveness->regs, &liveness->size, new_count, sizeof(*liveness->regs))) -- return false; -+ if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity, -+ allocator->count + 1, sizeof(*allocator->allocations))) -+ return; - -- if (liveness->size > old_capacity) -- memset(liveness->regs + old_capacity, 0, (liveness->size - old_capacity) * sizeof(*liveness->regs)); -- return true; -+ allocation = &allocator->allocations[allocator->count++]; -+ allocation->reg = reg_idx; -+ allocation->writemask = writemask; -+ allocation->first_write = first_write; -+ allocation->last_read = last_read; -+ -+ allocator->max_reg = max(allocator->max_reg, reg_idx); - } - - /* reg_size is the number of register components to be reserved, while component_count is the number - * of components for the register's writemask. In SM1, floats and vectors allocate the whole - * register, even if they don't use it completely. */ --static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *liveness, -+static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, - unsigned int component_count) - { -- unsigned int component_idx, writemask, i; - struct hlsl_reg ret = {0}; -+ unsigned int writemask; -+ uint32_t reg_idx; - - assert(component_count <= reg_size); - -- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) -+ for (reg_idx = 0;; ++reg_idx) - { -- if ((writemask = get_available_writemask(liveness, first_write, component_idx, reg_size))) -+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); -+ -+ if (vkd3d_popcount(writemask) >= reg_size) -+ { -+ writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); - break; -+ } - } -- if (component_idx == liveness->size) -- { -- if (!resize_liveness(ctx, liveness, component_idx + 4)) -- return ret; -- writemask = (1u << reg_size) - 1; -- } -- for (i = 0; i < 4; ++i) -- { -- if (writemask & (1u << i)) -- liveness->regs[component_idx + i].last_read = last_read; -- } -- ret.id = component_idx / 4; -+ -+ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); -+ -+ ret.id = reg_idx; -+ ret.allocation_size = 1; - ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); - ret.allocated = true; -- liveness->reg_count = max(liveness->reg_count, ret.id + 1); - return ret; - } - --static bool is_range_available(struct liveness *liveness, unsigned int first_write, -- unsigned int component_idx, unsigned int reg_size) -+static bool is_range_available(const struct register_allocator *allocator, -+ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) - { -- unsigned int i; -+ uint32_t i; - -- for (i = 0; i < reg_size; i += 4) -+ for (i = 0; i < (reg_size / 4); ++i) - { -- if (!get_available_writemask(liveness, first_write, component_idx + i, 4)) -+ if (get_available_writemask(allocator, first_write, last_read, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) - return false; - } - return true; - } - --static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liveness, -+static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size) - { -- unsigned int i, component_idx; - struct hlsl_reg ret = {0}; -+ uint32_t reg_idx; -+ unsigned int i; - -- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) -+ for (reg_idx = 0;; ++reg_idx) - { -- if (is_range_available(liveness, first_write, component_idx, -- min(reg_size, liveness->size - component_idx))) -+ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) - break; - } -- if (!resize_liveness(ctx, liveness, component_idx + reg_size)) -- return ret; - -- for (i = 0; i < reg_size; ++i) -- liveness->regs[component_idx + i].last_read = last_read; -- ret.id = component_idx / 4; -+ for (i = 0; i < reg_size / 4; ++i) -+ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); -+ -+ ret.id = reg_idx; -+ ret.allocation_size = align(reg_size, 4) / 4; - ret.allocated = true; -- liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); - return ret; - } - --static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct liveness *liveness, -+static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) - { - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; - -- if (type->type <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, liveness, first_write, last_read, reg_size, type->dimx); -+ if (type->class <= HLSL_CLASS_VECTOR) -+ return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); - else -- return allocate_range(ctx, liveness, first_write, last_read, reg_size); -+ return allocate_range(ctx, allocator, first_write, last_read, reg_size); - } - - static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -2565,14 +3343,112 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct - return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); - } - --static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) -+static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_resource_load *load; -+ struct hlsl_ir_var *var; -+ enum hlsl_regset regset; -+ unsigned int index; -+ -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ -+ load = hlsl_ir_resource_load(instr); -+ var = load->resource.var; -+ -+ regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -+ return false; -+ -+ if (regset == HLSL_REGSET_SAMPLERS) -+ { -+ enum hlsl_sampler_dim dim; -+ -+ assert(!load->sampler.var); -+ -+ dim = var->objects_usage[regset][index].sampler_dim; -+ if (dim != load->sampling_dim) -+ { -+ if (dim == HLSL_SAMPLER_DIM_GENERIC) -+ { -+ var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc; -+ } -+ else -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, -+ "Inconsistent generic sampler usage dimension."); -+ hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc, -+ VKD3D_SHADER_LOG_ERROR, "First use is here."); -+ return false; -+ } -+ } -+ } -+ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; -+ -+ return false; -+} -+ -+static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_resource_load *load; -+ struct hlsl_ir_var *var; -+ enum hlsl_regset regset; -+ unsigned int index; -+ -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ -+ load = hlsl_ir_resource_load(instr); -+ var = load->resource.var; -+ -+ regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); -+ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) -+ return false; -+ -+ var->objects_usage[regset][index].used = true; -+ var->bind_count[regset] = max(var->bind_count[regset], index + 1); -+ if (load->sampler.var) -+ { -+ var = load->sampler.var; -+ if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) -+ return false; -+ -+ var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; -+ var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); -+ } -+ -+ return false; -+} -+ -+static void calculate_resource_register_counts(struct hlsl_ctx *ctx) -+{ -+ struct hlsl_ir_var *var; -+ struct hlsl_type *type; -+ unsigned int k; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ type = var->data_type; -+ -+ for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) -+ { -+ bool is_separated = var->is_separated_resource; -+ -+ if (var->bind_count[k] > 0) -+ var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; -+ } -+ } -+} -+ -+static void allocate_variable_temp_register(struct hlsl_ctx *ctx, -+ struct hlsl_ir_var *var, struct register_allocator *allocator) - { - if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) - return; - - if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) - { -- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, liveness, -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, - var->first_write, var->last_read, var->data_type); - - TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', -@@ -2580,15 +3456,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir - } - } - --static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) -+static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, struct register_allocator *allocator) - { - struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { -+ /* In SM4 all constants are inlined. */ -+ if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) -+ continue; -+ - if (!instr->reg.allocated && instr->last_read) - { -- instr->reg = allocate_numeric_registers_for_type(ctx, liveness, instr->index, instr->last_read, -+ instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, - instr->data_type); - TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, - debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); -@@ -2599,8 +3480,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl - case HLSL_IR_IF: - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); -- allocate_temp_registers_recurse(ctx, &iff->then_instrs, liveness); -- allocate_temp_registers_recurse(ctx, &iff->else_instrs, liveness); -+ allocate_temp_registers_recurse(ctx, &iff->then_block, allocator); -+ allocate_temp_registers_recurse(ctx, &iff->else_block, allocator); - break; - } - -@@ -2609,21 +3490,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl - struct hlsl_ir_load *load = hlsl_ir_load(instr); - /* We need to at least allocate a variable for undefs. - * FIXME: We should probably find a way to remove them instead. */ -- allocate_variable_temp_register(ctx, load->src.var, liveness); -+ allocate_variable_temp_register(ctx, load->src.var, allocator); - break; - } - - case HLSL_IR_LOOP: - { - struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -- allocate_temp_registers_recurse(ctx, &loop->body, liveness); -+ allocate_temp_registers_recurse(ctx, &loop->body, allocator); - break; - } - - case HLSL_IR_STORE: - { - struct hlsl_ir_store *store = hlsl_ir_store(instr); -- allocate_variable_temp_register(ctx, store->lhs.var, liveness); -+ allocate_variable_temp_register(ctx, store->lhs.var, allocator); - break; - } - -@@ -2633,9 +3514,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl - } - } - --static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) -+static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) - { - struct hlsl_constant_defs *defs = &ctx->constant_defs; -+ struct hlsl_constant_register *reg; -+ size_t i; -+ -+ for (i = 0; i < defs->count; ++i) -+ { -+ reg = &defs->regs[i]; -+ if (reg->index == (component_index / 4)) -+ { -+ reg->value.f[component_index % 4] = f; -+ return; -+ } -+ } -+ -+ if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) -+ return; -+ reg = &defs->regs[defs->count++]; -+ memset(reg, 0, sizeof(*reg)); -+ reg->index = component_index / 4; -+ reg->value.f[component_index % 4] = f; -+} -+ -+static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, struct register_allocator *allocator) -+{ - struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -@@ -2646,66 +3551,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - const struct hlsl_type *type = instr->data_type; -- unsigned int x, y, i, writemask, end_reg; -- unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int x, i; - -- constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); -+ constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - -- if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, -- constant->reg.id + reg_size / 4, sizeof(*defs->values))) -- return; -- end_reg = constant->reg.id + reg_size / 4; -- if (end_reg > defs->count) -- { -- memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); -- defs->count = end_reg; -- } -+ assert(type->class <= HLSL_CLASS_LAST_NUMERIC); -+ assert(type->dimy == 1); -+ assert(constant->reg.writemask); - -- assert(type->type <= HLSL_CLASS_LAST_NUMERIC); -+ for (x = 0, i = 0; x < 4; ++x) -+ { -+ const union hlsl_constant_value_component *value; -+ float f; - -- if (!(writemask = constant->reg.writemask)) -- writemask = (1u << type->dimx) - 1; -+ if (!(constant->reg.writemask & (1u << x))) -+ continue; -+ value = &constant->value.u[i++]; - -- for (y = 0; y < type->dimy; ++y) -- { -- for (x = 0, i = 0; x < 4; ++x) -+ switch (type->base_type) - { -- const union hlsl_constant_value *value; -- float f; -- -- if (!(writemask & (1u << x))) -- continue; -- value = &constant->value[i++]; -- -- switch (type->base_type) -- { -- case HLSL_TYPE_BOOL: -- f = !!value->u; -- break; -- -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- f = value->f; -- break; -- -- case HLSL_TYPE_INT: -- f = value->i; -- break; -- -- case HLSL_TYPE_UINT: -- f = value->u; -- break; -- -- case HLSL_TYPE_DOUBLE: -- FIXME("Double constant.\n"); -- return; -- -- default: -- vkd3d_unreachable(); -- } -- defs->values[constant->reg.id + y].f[x] = f; -+ case HLSL_TYPE_BOOL: -+ f = !!value->u; -+ break; -+ -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ f = value->f; -+ break; -+ -+ case HLSL_TYPE_INT: -+ f = value->i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ f = value->u; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ FIXME("Double constant.\n"); -+ return; -+ -+ default: -+ vkd3d_unreachable(); - } -+ -+ record_constant(ctx, constant->reg.id * 4 + x, f); - } - - break; -@@ -2714,15 +3605,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b - case HLSL_IR_IF: - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); -- allocate_const_registers_recurse(ctx, &iff->then_instrs, liveness); -- allocate_const_registers_recurse(ctx, &iff->else_instrs, liveness); -+ allocate_const_registers_recurse(ctx, &iff->then_block, allocator); -+ allocate_const_registers_recurse(ctx, &iff->else_block, allocator); - break; - } - - case HLSL_IR_LOOP: - { - struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -- allocate_const_registers_recurse(ctx, &loop->body, liveness); -+ allocate_const_registers_recurse(ctx, &loop->body, allocator); - break; - } - -@@ -2734,11 +3625,9 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b - - static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -- struct liveness liveness = {0}; -+ struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -- allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); -- - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->last_read) -@@ -2748,12 +3637,16 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - if (reg_size == 0) - continue; - -- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &liveness, -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, - 1, UINT_MAX, var->data_type); - TRACE("Allocated %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); - } - } -+ -+ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); -+ -+ vkd3d_free(allocator.allocations); - } - - /* Simple greedy temporary register allocation pass that just assigns a unique -@@ -2762,15 +3655,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - * does not handle constants. */ - static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -- struct liveness liveness = {0}; -- allocate_temp_registers_recurse(ctx, &entry_func->body, &liveness); -- ctx->temp_count = liveness.reg_count; -- vkd3d_free(liveness.regs); -+ struct register_allocator allocator = {0}; -+ -+ /* ps_1_* outputs are special and go in temp register 0. */ -+ if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ { -+ size_t i; -+ -+ for (i = 0; i < entry_func->parameters.count; ++i) -+ { -+ const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; -+ -+ if (var->is_output_semantic) -+ { -+ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); -+ break; -+ } -+ } -+ } -+ -+ allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); -+ ctx->temp_count = allocator.max_reg + 1; -+ vkd3d_free(allocator.allocations); - } - - static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) - { -- static const char *shader_names[] = -+ static const char *const shader_names[] = - { - [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", - [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", -@@ -2791,7 +3702,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - D3DDECLUSAGE usage; - uint32_t usage_idx; - -- if (!hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) -+ /* ps_1_* outputs are special and go in temp register 0. */ -+ if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ return; -+ -+ builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); -+ if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); -@@ -2800,8 +3716,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - if ((!output && !var->last_read) || (output && !var->first_write)) - return; -- -- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); - } - else - { -@@ -2827,6 +3741,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - { - var->regs[HLSL_REGSET_NUMERIC].allocated = true; - var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; -+ var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; - var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; - TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', - var->regs[HLSL_REGSET_NUMERIC], var->data_type)); -@@ -2853,23 +3768,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 - - LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) - { -- if (buffer->used_size && buffer->reservation.type == 'b' && buffer->reservation.index == index) -+ if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) - return buffer; - } - return NULL; - } - --static void calculate_buffer_offset(struct hlsl_ir_var *var) -+static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) - { -+ unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ enum hlsl_type_class var_class = var->data_type->class; - struct hlsl_buffer *buffer = var->buffer; - -- buffer->size = hlsl_type_get_sm4_offset(var->data_type, buffer->size); -+ if (var->reg_reservation.offset_type == 'c') -+ { -+ if (var->reg_reservation.offset_index % 4) -+ { -+ if (var_class == HLSL_CLASS_MATRIX) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() reservations with matrix types must be aligned with the beginning of a register."); -+ } -+ else if (var_class == HLSL_CLASS_ARRAY) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() reservations with array types must be aligned with the beginning of a register."); -+ } -+ else if (var_class == HLSL_CLASS_STRUCT) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() reservations with struct types must be aligned with the beginning of a register."); -+ } -+ else if (var_class == HLSL_CLASS_VECTOR) -+ { -+ unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index); -+ -+ if (var->reg_reservation.offset_index != aligned_offset) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() reservations with vector types cannot span multiple registers."); -+ } -+ } -+ var->buffer_offset = var->reg_reservation.offset_index; -+ } -+ else -+ { -+ var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size); -+ } - -- var->buffer_offset = buffer->size; - TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); -- buffer->size += var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); - if (var->last_read) -- buffer->used_size = buffer->size; -+ buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); -+} -+ -+static void validate_buffer_offsets(struct hlsl_ctx *ctx) -+{ -+ struct hlsl_ir_var *var1, *var2; -+ struct hlsl_buffer *buffer; -+ -+ LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) -+ continue; -+ -+ buffer = var1->buffer; -+ if (!buffer->used_size) -+ continue; -+ -+ LIST_FOR_EACH_ENTRY(var2, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ unsigned int var1_reg_size, var2_reg_size; -+ -+ if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) -+ continue; -+ -+ if (var1 == var2 || var1->buffer != var2->buffer) -+ continue; -+ -+ /* This is to avoid reporting the error twice for the same pair of overlapping variables. */ -+ if (strcmp(var1->name, var2->name) >= 0) -+ continue; -+ -+ var1_reg_size = var1->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ var2_reg_size = var2->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ -+ if (var1->buffer_offset < var2->buffer_offset + var2_reg_size -+ && var2->buffer_offset < var1->buffer_offset + var1_reg_size) -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid packoffset() reservation: Variables %s and %s overlap.", -+ var1->name, var2->name); -+ } -+ } -+ -+ LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ buffer = var1->buffer; -+ if (!buffer || buffer == ctx->globals_buffer) -+ continue; -+ -+ if (var1->reg_reservation.offset_type -+ || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) -+ buffer->manually_packed_elements = true; -+ else -+ buffer->automatically_packed_elements = true; -+ -+ if (buffer->manually_packed_elements && buffer->automatically_packed_elements) -+ { -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "packoffset() must be specified for all the buffer elements, or none of them."); -+ break; -+ } -+ } - } - - static void allocate_buffers(struct hlsl_ctx *ctx) -@@ -2880,15 +3889,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_uniform && var->data_type->type != HLSL_CLASS_OBJECT) -+ if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) - { - if (var->is_param) - var->buffer = ctx->params_buffer; - -- calculate_buffer_offset(var); -+ calculate_buffer_offset(ctx, var); - } - } - -+ validate_buffer_offsets(ctx); -+ - LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!buffer->used_size) -@@ -2896,28 +3907,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - if (buffer->type == HLSL_BUFFER_CONSTANT) - { -- if (buffer->reservation.type == 'b') -+ if (buffer->reservation.reg_type == 'b') - { -- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.index); -+ const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); - - if (reserved_buffer && reserved_buffer != buffer) - { - hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple buffers bound to cb%u.", buffer->reservation.index); -+ "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); - hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, -- "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.index); -+ "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); - } - -- buffer->reg.id = buffer->reservation.index; -+ buffer->reg.id = buffer->reservation.reg_index; -+ buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; - TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); - } -- else if (!buffer->reservation.type) -+ else if (!buffer->reservation.reg_type) - { - while (get_reserved_buffer(ctx, index)) - ++index; - - buffer->reg.id = index; -+ buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; - TRACE("Allocated %s to cb%u.\n", buffer->name, index); - ++index; -@@ -2936,16 +3949,35 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, -- uint32_t index) -+ uint32_t index, bool allocated_only) - { - const struct hlsl_ir_var *var; -+ unsigned int start, count; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) - { -- if (!var->regs[regset].allocated) -+ if (var->reg_reservation.reg_type == get_regset_name(regset) -+ && var->data_type->reg_size[regset]) -+ { -+ /* Vars with a reservation prevent non-reserved vars from being -+ * bound there even if the reserved vars aren't used. */ -+ start = var->reg_reservation.reg_index; -+ count = var->data_type->reg_size[regset]; -+ -+ if (!var->regs[regset].allocated && allocated_only) -+ continue; -+ } -+ else if (var->regs[regset].allocated) -+ { -+ start = var->regs[regset].id; -+ count = var->regs[regset].allocation_size; -+ } -+ else -+ { - continue; -+ } - -- if (index == var->regs[regset].id) -+ if (start <= index && index < start + count) - return var; - } - return NULL; -@@ -2956,7 +3988,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - char regset_name = get_regset_name(regset); - struct hlsl_ir_var *var; - uint32_t min_index = 0; -- uint32_t index; - - if (regset == HLSL_REGSET_UAVS) - { -@@ -2968,19 +3999,18 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - } - } - -- index = min_index; -- - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (!var->last_read || !var->data_type->reg_size[regset]) -+ unsigned int count = var->regs[regset].allocation_size; -+ -+ if (count == 0) - continue; - -+ /* The variable was already allocated if it has a reservation. */ - if (var->regs[regset].allocated) - { -- const struct hlsl_ir_var *reserved_object; -- unsigned int index = var->regs[regset].id; -- -- reserved_object = get_allocated_object(ctx, regset, index); -+ const struct hlsl_ir_var *reserved_object, *last_reported = NULL; -+ unsigned int index, i; - - if (var->regs[regset].id < min_index) - { -@@ -2988,28 +4018,47 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "UAV index (%u) must be higher than the maximum render target index (%u).", - var->regs[regset].id, min_index - 1); -+ continue; - } -- else if (reserved_object && reserved_object != var) -+ -+ for (i = 0; i < count; ++i) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple objects bound to %c%u.", regset_name, index); -- hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, -- "Object '%s' is already bound to %c%u.", reserved_object->name, -- regset_name, index); -- } -+ index = var->regs[regset].id + i; - -- var->regs[regset].id = var->reg_reservation.index; -- var->regs[regset].allocated = true; -- TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); -+ /* get_allocated_object() may return "var" itself, but we -+ * actually want that, otherwise we'll end up reporting the -+ * same conflict between the same two variables twice. */ -+ reserved_object = get_allocated_object(ctx, regset, index, true); -+ if (reserved_object && reserved_object != var && reserved_object != last_reported) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -+ "Multiple variables bound to %c%u.", regset_name, index); -+ hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, -+ "Variable '%s' is already bound to %c%u.", reserved_object->name, -+ regset_name, index); -+ last_reported = reserved_object; -+ } -+ } - } - else - { -- while (get_allocated_object(ctx, regset, index)) -+ unsigned int index = min_index; -+ unsigned int available = 0; -+ -+ while (available < count) -+ { -+ if (get_allocated_object(ctx, regset, index, false)) -+ available = 0; -+ else -+ ++available; - ++index; -+ } -+ index -= count; - - var->regs[regset].id = index; - var->regs[regset].allocated = true; -- TRACE("Allocated object to %c%u.\n", regset_name, index); -+ TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, -+ index + count); - ++index; - } - } -@@ -3034,12 +4083,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - return false; - - /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->type == HLSL_CLASS_SCALAR -+ assert(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->base_type == HLSL_TYPE_UINT); - -- idx = hlsl_ir_constant(path_node)->value[0].u; -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; - -- switch (type->type) -+ switch (type->class) - { - case HLSL_CLASS_VECTOR: - if (idx >= type->dimx) -@@ -3090,9 +4139,59 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - return true; - } - -+bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, -+ enum hlsl_regset regset, unsigned int *index) -+{ -+ struct hlsl_type *type = deref->var->data_type; -+ unsigned int i; -+ -+ assert(regset <= HLSL_REGSET_LAST_OBJECT); -+ -+ *index = 0; -+ -+ for (i = 0; i < deref->path_len; ++i) -+ { -+ struct hlsl_ir_node *path_node = deref->path[i].node; -+ unsigned int idx = 0; -+ -+ assert(path_node); -+ if (path_node->type != HLSL_IR_CONSTANT) -+ return false; -+ -+ /* We should always have generated a cast to UINT. */ -+ assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ && path_node->data_type->base_type == HLSL_TYPE_UINT); -+ -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ if (idx >= type->e.array.elements_count) -+ return false; -+ -+ *index += idx * type->e.array.type->reg_size[regset]; -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ *index += type->e.record.fields[idx].reg_offset[regset]; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); -+ } -+ -+ assert(type->reg_size[regset] == 1); -+ return true; -+} -+ - bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) - { - struct hlsl_ir_node *offset_node = deref->offset.node; -+ enum hlsl_regset regset; - unsigned int size; - - if (!offset_node) -@@ -3102,15 +4201,16 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - } - - /* We should always have generated a cast to UINT. */ -- assert(offset_node->data_type->type == HLSL_CLASS_SCALAR -+ assert(offset_node->data_type->class == HLSL_CLASS_SCALAR - && offset_node->data_type->base_type == HLSL_TYPE_UINT); - - if (offset_node->type != HLSL_IR_CONSTANT) - return false; - -- *offset = hlsl_ir_constant(offset_node)->value[0].u; -+ *offset = hlsl_ir_constant(offset_node)->value.u[0].u; -+ regset = hlsl_type_get_regset(deref->data_type); - -- size = deref->var->data_type->reg_size[deref->offset_regset]; -+ size = deref->var->data_type->reg_size[regset]; - if (*offset >= size) - { - hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -@@ -3140,7 +4240,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -- assert(deref->offset_regset == HLSL_REGSET_NUMERIC); -+ assert(deref->data_type); -+ assert(deref->data_type->class <= HLSL_CLASS_LAST_NUMERIC); - - ret.id += offset / 4; - -@@ -3170,7 +4271,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - const struct hlsl_type *type = instr->data_type; - const struct hlsl_ir_constant *constant; - -- if (type->type != HLSL_CLASS_SCALAR -+ if (type->class != HLSL_CLASS_SCALAR - || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) - { - struct vkd3d_string_buffer *string; -@@ -3190,15 +4291,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a - } - constant = hlsl_ir_constant(instr); - -- if ((type->base_type == HLSL_TYPE_INT && constant->value[0].i <= 0) -- || (type->base_type == HLSL_TYPE_UINT && !constant->value[0].u)) -+ if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) -+ || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, - "Thread count must be a positive integer."); - -- ctx->thread_count[i] = constant->value[0].u; -+ ctx->thread_count[i] = constant->value.u[0].u; - } - } - -+static bool type_has_object_components(struct hlsl_type *type) -+{ -+ if (type->class == HLSL_CLASS_OBJECT) -+ return true; -+ if (type->class == HLSL_CLASS_ARRAY) -+ return type_has_object_components(type->e.array.type); -+ if (type->class == HLSL_CLASS_STRUCT) -+ { -+ unsigned int i; -+ -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ if (type_has_object_components(type->e.record.fields[i].type)) -+ return true; -+ } -+ } -+ return false; -+} -+ - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -3209,10 +4329,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - unsigned int i; - bool progress; - -- list_move_head(&body->instrs, &ctx->static_initializers); -+ list_move_head(&body->instrs, &ctx->static_initializers.instrs); - - memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); -- transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); -+ hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); - vkd3d_free(recursive_call_ctx.backtrace); - - /* Avoid going into an infinite loop when processing call instructions. -@@ -3222,41 +4342,51 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - lower_return(ctx, entry_func, body, false); - -- while (transform_ir(ctx, lower_calls, body, NULL)); -+ while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); -+ -+ lower_ir(ctx, lower_matrix_swizzles, body); -+ hlsl_transform_ir(ctx, lower_index_loads, body, NULL); - - LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) - { - if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -- prepend_uniform_copy(ctx, &body->instrs, var); -+ prepend_uniform_copy(ctx, body, var); - } - - for (i = 0; i < entry_func->parameters.count; ++i) - { - var = entry_func->parameters.vars[i]; - -- if (var->data_type->type == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) - { -- prepend_uniform_copy(ctx, &body->instrs, var); -+ prepend_uniform_copy(ctx, body, var); - } - else - { -- if (var->data_type->type != HLSL_CLASS_STRUCT && !var->semantic.name) -+ if (type_has_object_components(var->data_type)) -+ hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); -+ -+ if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT -+ && !var->semantic.name) -+ { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Parameter \"%s\" is missing a semantic.", var->name); -+ var->semantic.reported_missing = true; -+ } - - if (var->storage_modifiers & HLSL_STORAGE_IN) -- prepend_input_var_copy(ctx, &body->instrs, var); -+ prepend_input_var_copy(ctx, body, var); - if (var->storage_modifiers & HLSL_STORAGE_OUT) -- append_output_var_copy(ctx, &body->instrs, var); -+ append_output_var_copy(ctx, body, var); - } - } - if (entry_func->return_var) - { -- if (entry_func->return_var->data_type->type != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) -+ if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - -- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); -+ append_output_var_copy(ctx, body, entry_func->return_var); - } - - for (i = 0; i < entry_func->attr_count; ++i) -@@ -3274,60 +4404,81 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - -- transform_ir(ctx, lower_broadcasts, body, NULL); -- while (transform_ir(ctx, fold_redundant_casts, body, NULL)); -+ if (profile->major_version >= 4) -+ { -+ hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); -+ } -+ hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); -+ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); - do - { -- progress = transform_ir(ctx, split_array_copies, body, NULL); -- progress |= transform_ir(ctx, split_struct_copies, body, NULL); -+ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); -+ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); - } - while (progress); -- transform_ir(ctx, split_matrix_copies, body, NULL); -- -- transform_ir(ctx, lower_narrowing_casts, body, NULL); -- transform_ir(ctx, lower_casts_to_bool, body, NULL); -- transform_ir(ctx, lower_int_division, body, NULL); -- transform_ir(ctx, lower_int_modulus, body, NULL); -- transform_ir(ctx, lower_int_abs, body, NULL); -- transform_ir(ctx, lower_float_modulus, body, NULL); -+ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); -+ -+ hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); -+ hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); -+ hlsl_transform_ir(ctx, lower_int_dot, body, NULL); -+ lower_ir(ctx, lower_int_division, body); -+ lower_ir(ctx, lower_int_modulus, body); -+ hlsl_transform_ir(ctx, lower_int_abs, body, NULL); -+ lower_ir(ctx, lower_float_modulus, body); -+ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { -- progress = transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -- progress |= transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -- progress |= copy_propagation_execute(ctx, body); -- progress |= transform_ir(ctx, fold_swizzle_chains, body, NULL); -- progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, body); -+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - } - while (progress); - -+ hlsl_transform_ir(ctx, lower_nonconstant_vector_derefs, body, NULL); -+ hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); -+ hlsl_transform_ir(ctx, lower_int_dot, body, NULL); -+ -+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); -+ if (profile->major_version >= 4) -+ hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -+ hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); -+ sort_synthetic_separated_samplers_first(ctx); -+ -+ if (profile->major_version >= 4) -+ hlsl_transform_ir(ctx, lower_ternary, body, NULL); - if (profile->major_version < 4) - { -- transform_ir(ctx, lower_division, body, NULL); -- transform_ir(ctx, lower_sqrt, body, NULL); -- transform_ir(ctx, lower_dot, body, NULL); -+ hlsl_transform_ir(ctx, lower_division, body, NULL); -+ hlsl_transform_ir(ctx, lower_sqrt, body, NULL); -+ hlsl_transform_ir(ctx, lower_dot, body, NULL); -+ hlsl_transform_ir(ctx, lower_round, body, NULL); - } - - if (profile->major_version < 2) - { -- transform_ir(ctx, lower_abs, body, NULL); -+ hlsl_transform_ir(ctx, lower_abs, body, NULL); - } - -- transform_ir(ctx, validate_static_object_references, body, NULL); -- - /* TODO: move forward, remove when no longer needed */ -- transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); -- while (transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); -+ transform_derefs(ctx, replace_deref_path_with_offset, body); -+ while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); - - do - compute_liveness(ctx, entry_func); -- while (transform_ir(ctx, dce, body, NULL)); -+ while (hlsl_transform_ir(ctx, dce, body, NULL)); - - compute_liveness(ctx, entry_func); - - if (TRACE_ON()) - rb_for_each_entry(&ctx->functions, dump_function, ctx); - -+ calculate_resource_register_counts(ctx); -+ - allocate_register_reservations(ctx); -+ - allocate_temp_registers(ctx, entry_func); - if (profile->major_version < 4) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 3210bbd5712..41a72ab6c0d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -22,7 +22,49 @@ - - #include "hlsl.h" - --static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) -+static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, -+ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fabsf(src->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fabs(src->value.u[k].d); -+ break; -+ -+ case HLSL_TYPE_INT: -+ /* C's abs(INT_MIN) is undefined, but HLSL evaluates this to INT_MIN */ -+ if (src->value.u[k].i == INT_MIN) -+ dst->u[k].i = INT_MIN; -+ else -+ dst->u[k].i = abs(src->value.u[k].i); -+ break; -+ -+ case HLSL_TYPE_UINT: -+ dst->u[k].u = src->value.u[k].u; -+ break; -+ -+ default: -+ FIXME("Fold abs() for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, -+ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) - { - unsigned int k; - uint32_t u; -@@ -30,75 +72,75 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct - double d; - float f; - -- if (dst->node.data_type->dimx != src->node.data_type->dimx -- || dst->node.data_type->dimy != src->node.data_type->dimy) -+ if (dst_type->dimx != src->node.data_type->dimx -+ || dst_type->dimy != src->node.data_type->dimy) - { - FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), -- debug_hlsl_type(ctx, dst->node.data_type)); -+ debug_hlsl_type(ctx, dst_type)); - return false; - } - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (src->node.data_type->base_type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- u = src->value[k].f; -- i = src->value[k].f; -- f = src->value[k].f; -- d = src->value[k].f; -+ u = src->value.u[k].f; -+ i = src->value.u[k].f; -+ f = src->value.u[k].f; -+ d = src->value.u[k].f; - break; - - case HLSL_TYPE_DOUBLE: -- u = src->value[k].d; -- i = src->value[k].d; -- f = src->value[k].d; -- d = src->value[k].d; -+ u = src->value.u[k].d; -+ i = src->value.u[k].d; -+ f = src->value.u[k].d; -+ d = src->value.u[k].d; - break; - - case HLSL_TYPE_INT: -- u = src->value[k].i; -- i = src->value[k].i; -- f = src->value[k].i; -- d = src->value[k].i; -+ u = src->value.u[k].i; -+ i = src->value.u[k].i; -+ f = src->value.u[k].i; -+ d = src->value.u[k].i; - break; - - case HLSL_TYPE_UINT: -- u = src->value[k].u; -- i = src->value[k].u; -- f = src->value[k].u; -- d = src->value[k].u; -+ u = src->value.u[k].u; -+ i = src->value.u[k].u; -+ f = src->value.u[k].u; -+ d = src->value.u[k].u; - break; - - case HLSL_TYPE_BOOL: -- u = !!src->value[k].u; -- i = !!src->value[k].u; -- f = !!src->value[k].u; -- d = !!src->value[k].u; -+ u = !!src->value.u[k].u; -+ i = !!src->value.u[k].u; -+ f = !!src->value.u[k].u; -+ d = !!src->value.u[k].u; - break; - - default: - vkd3d_unreachable(); - } - -- switch (dst->node.data_type->base_type) -+ switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- dst->value[k].f = f; -+ dst->u[k].f = f; - break; - - case HLSL_TYPE_DOUBLE: -- dst->value[k].d = d; -+ dst->u[k].d = d; - break; - - case HLSL_TYPE_INT: -- dst->value[k].i = i; -+ dst->u[k].i = i; - break; - - case HLSL_TYPE_UINT: -- dst->value[k].u = u; -+ dst->u[k].u = u; - break; - - case HLSL_TYPE_BOOL: -@@ -110,398 +152,728 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct - return true; - } - --static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) -+static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- dst->value[k].f = -src->value[k].f; -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, -+ "Indefinite logarithm result."); -+ } -+ dst->u[k].f = log2f(src->value.u[k].f); -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } - break; - - case HLSL_TYPE_DOUBLE: -- dst->value[k].d = -src->value[k].d; -+ if (src->value.u[k].d < 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, -+ "Indefinite logarithm result."); -+ } -+ dst->u[k].d = log2(src->value.u[k].d); -+ break; -+ -+ default: -+ FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, -+ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = -src->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = -src->value.u[k].d; - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->value[k].u = -src->value[k].u; -+ dst->u[k].u = -src->value.u[k].u; - break; - - default: -- FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, -- struct hlsl_ir_constant *src2) -+static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(type == src->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- dst->value[k].f = src1->value[k].f + src2->value[k].f; -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ "Floating point division by zero."); -+ } -+ dst->u[k].f = 1.0f / src->value.u[k].f; -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } - break; - - case HLSL_TYPE_DOUBLE: -- dst->value[k].d = src1->value[k].d + src2->value[k].d; -+ if (src->value.u[k].d == 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ "Floating point division by zero."); -+ } -+ dst->u[k].d = 1.0 / src->value.u[k].d; - break; - -- /* Handling HLSL_TYPE_INT through the unsigned field to avoid -- * undefined behavior with signed integers in C. */ -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u + src2->value[k].u; -+ default: -+ FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, -+ "Imaginary square root result."); -+ } -+ dst->u[k].f = sqrtf(src->value.u[k].f); -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, -+ "Infinities and NaNs are not allowed by the shader model."); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ if (src->value.u[k].d < 0.0) -+ { -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, -+ "Imaginary square root result."); -+ } -+ dst->u[k].d = sqrt(src->value.u[k].d); - break; - - default: -- FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } -+ - return true; - } - --static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- dst->value[k].f = src1->value[k].f * src2->value[k].f; -+ dst->u[k].f = src1->value.u[k].f + src2->value.u[k].f; - break; - - case HLSL_TYPE_DOUBLE: -- dst->value[k].d = src1->value[k].d * src2->value[k].d; -+ dst->u[k].d = src1->value.u[k].d + src2->value.u[k].d; - break; - -+ /* Handling HLSL_TYPE_INT through the unsigned field to avoid -+ * undefined behavior with signed integers in C. */ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u * src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u + src2->value.u[k].u; - break; - - default: -- FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - -- assert(dst->node.data_type->base_type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < 4; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { -- switch (src1->node.data_type->base_type) -+ switch (type) - { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- dst->value[k].u = src1->value[k].f != src2->value[k].f; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; - break; - -- case HLSL_TYPE_DOUBLE: -- dst->value[k].u = src1->value[k].d != src2->value[k].d; -- break; -+ default: -+ FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); - -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: -- dst->value[k].u = src1->value[k].u != src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; - break; - - default: -- vkd3d_unreachable(); -+ FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; -+ break; -+ -+ default: -+ FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; - } -+ } -+ return true; -+} - -- dst->value[k].u *= ~0u; -+static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ -+ dst->u[0].f = 0.0f; -+ for (k = 0; k < src1->node.data_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ default: -+ FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } - } -+ - return true; - } - --static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); -+ assert(type == src3->node.data_type->base_type); -+ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ assert(src3->node.data_type->dimx == 1); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ dst->u[0].f = src3->value.u[0].f; -+ for (k = 0; k < src1->node.data_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -- if (ctx->profile->major_version >= 4 && src2->value[k].f == 0) -+ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ default: -+ FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -+ const struct vkd3d_shader_location *loc) -+{ -+ enum hlsl_base_type type = dst_type->base_type; -+ unsigned int k; -+ -+ assert(type == src1->node.data_type->base_type); -+ assert(type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (ctx->profile->major_version >= 4 && src2->value.u[k].f == 0) - { -- hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, - "Floating point division by zero."); - } -- dst->value[k].f = src1->value[k].f / src2->value[k].f; -- if (ctx->profile->major_version < 4 && !isfinite(dst->value[k].f)) -+ dst->u[k].f = src1->value.u[k].f / src2->value.u[k].f; -+ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) - { -- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Infinities and NaNs are not allowed by the shader model."); - } - break; - - case HLSL_TYPE_DOUBLE: -- if (src2->value[k].d == 0) -+ if (src2->value.u[k].d == 0) - { -- hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, -+ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, - "Floating point division by zero."); - } -- dst->value[k].d = src1->value[k].d / src2->value[k].d; -+ dst->u[k].d = src1->value.u[k].d / src2->value.u[k].d; - break; - - case HLSL_TYPE_INT: -- if (src2->value[k].i == 0) -+ if (src2->value.u[k].i == 0) - { -- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); - return false; - } -- if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) -- dst->value[k].i = INT_MIN; -+ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -+ dst->u[k].i = INT_MIN; - else -- dst->value[k].i = src1->value[k].i / src2->value[k].i; -+ dst->u[k].i = src1->value.u[k].i / src2->value.u[k].i; - break; - - case HLSL_TYPE_UINT: -- if (src2->value[k].u == 0) -+ if (src2->value.u[k].u == 0) - { -- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); - return false; - } -- dst->value[k].u = src1->value[k].u / src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u / src2->value.u[k].u; - break; - - default: -- FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: -- if (src2->value[k].i == 0) -- { -- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -- "Division by zero."); -- return false; -- } -- if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) -- dst->value[k].i = 0; -- else -- dst->value[k].i = src1->value[k].i % src2->value[k].i; -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ -+static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; - break; - - case HLSL_TYPE_UINT: -- if (src2->value[k].u == 0) -- { -- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, -- "Division by zero."); -- return false; -- } -- dst->value[k].u = src1->value[k].u % src2->value[k].u; -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; - break; - - default: -- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -- return false; -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } - --static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; - unsigned int k; - -- assert(type == src1->node.data_type->base_type); -- assert(type == src2->node.data_type->base_type); -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { -- switch (type) -+ switch (src1->node.data_type->base_type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: -- dst->value[k].i = max(src1->value[k].i, src2->value[k].i); -+ dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; - break; - - case HLSL_TYPE_UINT: -- dst->value[k].u = max(src1->value[k].u, src2->value[k].u); -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; - break; - - default: -- FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -- return false; -+ vkd3d_unreachable(); - } -+ -+ dst->u[k].u *= ~0u; - } - return true; - } - --static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: -- dst->value[k].i = min(src1->value[k].i, src2->value[k].i); -+ dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); - break; - - case HLSL_TYPE_UINT: -- dst->value[k].u = min(src1->value[k].u, src2->value[k].u); -+ dst->u[k].u = max(src1->value.u[k].u, src2->value.u[k].u); - break; - - default: -- FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); -+ break; -+ - case HLSL_TYPE_INT: -+ dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); -+ break; -+ - case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u ^ src2->value[k].u; -+ dst->u[k].u = min(src1->value.u[k].u, src2->value.u[k].u); - break; - - default: -- FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, -+ const struct vkd3d_shader_location *loc) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { - case HLSL_TYPE_INT: -+ if (src2->value.u[k].i == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) -+ dst->u[k].i = 0; -+ else -+ dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; -+ break; -+ - case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u & src2->value[k].u; -+ if (src2->value.u[k].u == 0) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); -+ return false; -+ } -+ dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - --static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, -- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) -+static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) - { -- enum hlsl_base_type type = dst->node.data_type->base_type; -+ enum hlsl_base_type type = dst_type->base_type; - unsigned int k; - - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); - -- for (k = 0; k < dst->node.data_type->dimx; ++k) -+ for (k = 0; k < dst_type->dimx; ++k) - { - switch (type) - { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; -+ break; -+ - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- dst->value[k].u = src1->value[k].u | src2->value[k].u; -+ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; - break; - - default: -- FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); -+ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; - } - } - return true; - } - -+static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, -+ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) -+{ -+ unsigned int k; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ assert(src1->node.data_type->base_type == src2->node.data_type->base_type); -+ -+ for (k = 0; k < dst_type->dimx; ++k) -+ { -+ switch (src1->node.data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ dst->u[k].u *= ~0u; -+ } -+ return true; -+} -+ - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; -+ struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; -+ struct hlsl_constant_value res = {0}; -+ struct hlsl_ir_node *res_node; - struct hlsl_ir_expr *expr; - unsigned int i; - bool success; -@@ -512,7 +884,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - if (!expr->operands[0].node) - return false; - -- if (instr->data_type->type > HLSL_CLASS_VECTOR) -+ if (instr->data_type->class > HLSL_CLASS_VECTOR) - return false; - - for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) -@@ -521,64 +893,101 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - { - if (expr->operands[i].node->type != HLSL_IR_CONSTANT) - return false; -- assert(expr->operands[i].node->data_type->type <= HLSL_CLASS_VECTOR); -+ assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); - } - } - arg1 = hlsl_ir_constant(expr->operands[0].node); - if (expr->operands[1].node) - arg2 = hlsl_ir_constant(expr->operands[1].node); -- -- if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) -- return false; -+ if (expr->operands[2].node) -+ arg3 = hlsl_ir_constant(expr->operands[2].node); - - switch (expr->op) - { -+ case HLSL_OP1_ABS: -+ success = fold_abs(ctx, &res, instr->data_type, arg1); -+ break; -+ - case HLSL_OP1_CAST: -- success = fold_cast(ctx, res, arg1); -+ success = fold_cast(ctx, &res, instr->data_type, arg1); -+ break; -+ -+ case HLSL_OP1_LOG2: -+ success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); - break; - - case HLSL_OP1_NEG: -- success = fold_neg(ctx, res, arg1); -+ success = fold_neg(ctx, &res, instr->data_type, arg1); -+ break; -+ -+ case HLSL_OP1_RCP: -+ success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); -+ break; -+ -+ case HLSL_OP1_SQRT: -+ success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); - break; - - case HLSL_OP2_ADD: -- success = fold_add(ctx, res, arg1, arg2); -+ success = fold_add(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_MUL: -- success = fold_mul(ctx, res, arg1, arg2); -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_LOGIC_AND: -+ success = fold_and(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_NEQUAL: -- success = fold_nequal(ctx, res, arg1, arg2); -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_LOGIC_OR: -+ success = fold_or(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_BIT_XOR: -+ success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_DOT: -+ success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_DIV: -- success = fold_div(ctx, res, arg1, arg2); -+ success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_MOD: -- success = fold_mod(ctx, res, arg1, arg2); -+ case HLSL_OP2_EQUAL: -+ success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_GEQUAL: -+ success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_LESS: -+ success = fold_less(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_MAX: -- success = fold_max(ctx, res, arg1, arg2); -+ success = fold_max(ctx, &res, instr->data_type, arg1, arg2); - break; - - case HLSL_OP2_MIN: -- success = fold_min(ctx, res, arg1, arg2); -+ success = fold_min(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_XOR: -- success = fold_bit_xor(ctx, res, arg1, arg2); -+ case HLSL_OP2_MOD: -+ success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); - break; - -- case HLSL_OP2_BIT_AND: -- success = fold_bit_and(ctx, res, arg1, arg2); -+ case HLSL_OP2_MUL: -+ success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); - break; - -- case HLSL_OP2_BIT_OR: -- success = fold_bit_or(ctx, res, arg1, arg2); -+ case HLSL_OP2_NEQUAL: -+ success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); -+ break; -+ -+ case HLSL_OP3_DP2ADD: -+ success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); - break; - - default: -@@ -589,20 +998,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - - if (success) - { -- list_add_before(&expr->node.entry, &res->node.entry); -- hlsl_replace_node(&expr->node, &res->node); -- } -- else -- { -- vkd3d_free(res); -+ if (!(res_node = hlsl_new_constant(ctx, instr->data_type, &res, &instr->loc))) -+ return false; -+ list_add_before(&expr->node.entry, &res_node->entry); -+ hlsl_replace_node(&expr->node, res_node); - } - return success; - } - - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { -- struct hlsl_ir_constant *value, *res; -+ struct hlsl_constant_value value; - struct hlsl_ir_swizzle *swizzle; -+ struct hlsl_ir_constant *src; -+ struct hlsl_ir_node *dst; - unsigned int i; - - if (instr->type != HLSL_IR_SWIZZLE) -@@ -610,15 +1019,15 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - swizzle = hlsl_ir_swizzle(instr); - if (swizzle->val.node->type != HLSL_IR_CONSTANT) - return false; -- value = hlsl_ir_constant(swizzle->val.node); -- -- if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) -- return false; -+ src = hlsl_ir_constant(swizzle->val.node); - - for (i = 0; i < swizzle->node.data_type->dimx; ++i) -- res->value[i] = value->value[hlsl_swizzle_get_component(swizzle->swizzle, i)]; -+ value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; -+ -+ if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) -+ return false; - -- list_add_before(&swizzle->node.entry, &res->node.entry); -- hlsl_replace_node(&swizzle->node, &res->node); -+ list_add_before(&swizzle->node.entry, &dst->entry); -+ hlsl_replace_node(&swizzle->node, dst); - return true; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c -deleted file mode 100644 -index 4a62d804ed6..00000000000 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c -+++ /dev/null -@@ -1,980 +0,0 @@ --/* -- * HLSL code generation for DXBC shader models 1-3 -- * -- * Copyright 2019-2020 Zebediah Figura for CodeWeavers -- * -- * This library is free software; you can redistribute it and/or -- * modify it under the terms of the GNU Lesser General Public -- * License as published by the Free Software Foundation; either -- * version 2.1 of the License, or (at your option) any later version. -- * -- * This library is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * Lesser General Public License for more details. -- * -- * You should have received a copy of the GNU Lesser General Public -- * License along with this library; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -- */ -- --#include "hlsl.h" --#include -- --bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) --{ -- unsigned int i; -- -- static const struct -- { -- const char *semantic; -- bool output; -- enum vkd3d_shader_type shader_type; -- unsigned int major_version; -- D3DSHADER_PARAM_REGISTER_TYPE type; -- DWORD offset; -- } -- register_table[] = -- { -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, -- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, -- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, -- }; -- -- for (i = 0; i < ARRAY_SIZE(register_table); ++i) -- { -- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -- && output == register_table[i].output -- && ctx->profile->type == register_table[i].shader_type -- && ctx->profile->major_version == register_table[i].major_version) -- { -- *type = register_table[i].type; -- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) -- *reg = register_table[i].offset; -- else -- *reg = semantic->index; -- return true; -- } -- } -- -- return false; --} -- --bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) --{ -- static const struct -- { -- const char *name; -- D3DDECLUSAGE usage; -- } -- semantics[] = -- { -- {"binormal", D3DDECLUSAGE_BINORMAL}, -- {"blendindices", D3DDECLUSAGE_BLENDINDICES}, -- {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, -- {"color", D3DDECLUSAGE_COLOR}, -- {"depth", D3DDECLUSAGE_DEPTH}, -- {"fog", D3DDECLUSAGE_FOG}, -- {"normal", D3DDECLUSAGE_NORMAL}, -- {"position", D3DDECLUSAGE_POSITION}, -- {"positiont", D3DDECLUSAGE_POSITIONT}, -- {"psize", D3DDECLUSAGE_PSIZE}, -- {"sample", D3DDECLUSAGE_SAMPLE}, -- {"sv_depth", D3DDECLUSAGE_DEPTH}, -- {"sv_position", D3DDECLUSAGE_POSITION}, -- {"sv_target", D3DDECLUSAGE_COLOR}, -- {"tangent", D3DDECLUSAGE_TANGENT}, -- {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, -- {"texcoord", D3DDECLUSAGE_TEXCOORD}, -- }; -- -- unsigned int i; -- -- for (i = 0; i < ARRAY_SIZE(semantics); ++i) -- { -- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) -- { -- *usage = semantics[i].usage; -- *usage_idx = semantic->index; -- return true; -- } -- } -- -- return false; --} -- --static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) --{ -- if (type == VKD3D_SHADER_TYPE_VERTEX) -- return D3DVS_VERSION(major, minor); -- else -- return D3DPS_VERSION(major, minor); --} -- --static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) --{ -- switch (type->type) -- { -- case HLSL_CLASS_ARRAY: -- return sm1_class(type->e.array.type); -- case HLSL_CLASS_MATRIX: -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -- return D3DXPC_MATRIX_COLUMNS; -- else -- return D3DXPC_MATRIX_ROWS; -- case HLSL_CLASS_OBJECT: -- return D3DXPC_OBJECT; -- case HLSL_CLASS_SCALAR: -- return D3DXPC_SCALAR; -- case HLSL_CLASS_STRUCT: -- return D3DXPC_STRUCT; -- case HLSL_CLASS_VECTOR: -- return D3DXPC_VECTOR; -- default: -- ERR("Invalid class %#x.\n", type->type); -- vkd3d_unreachable(); -- } --} -- --static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) --{ -- switch (type->base_type) -- { -- case HLSL_TYPE_BOOL: -- return D3DXPT_BOOL; -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- return D3DXPT_FLOAT; -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- return D3DXPT_INT; -- case HLSL_TYPE_PIXELSHADER: -- return D3DXPT_PIXELSHADER; -- case HLSL_TYPE_SAMPLER: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3DXPT_SAMPLER1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3DXPT_SAMPLER2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3DXPT_SAMPLER3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3DXPT_SAMPLERCUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3DXPT_SAMPLER; -- default: -- ERR("Invalid dimension %#x.\n", type->sampler_dim); -- vkd3d_unreachable(); -- } -- break; -- case HLSL_TYPE_STRING: -- return D3DXPT_STRING; -- case HLSL_TYPE_TEXTURE: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3DXPT_TEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3DXPT_TEXTURE2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3DXPT_TEXTURE3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3DXPT_TEXTURECUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3DXPT_TEXTURE; -- default: -- ERR("Invalid dimension %#x.\n", type->sampler_dim); -- vkd3d_unreachable(); -- } -- break; -- case HLSL_TYPE_VERTEXSHADER: -- return D3DXPT_VERTEXSHADER; -- case HLSL_TYPE_VOID: -- return D3DXPT_VOID; -- default: -- vkd3d_unreachable(); -- } --} -- --static const struct hlsl_type *get_array_type(const struct hlsl_type *type) --{ -- if (type->type == HLSL_CLASS_ARRAY) -- return get_array_type(type->e.array.type); -- return type; --} -- --static unsigned int get_array_size(const struct hlsl_type *type) --{ -- if (type->type == HLSL_CLASS_ARRAY) -- return get_array_size(type->e.array.type) * type->e.array.elements_count; -- return 1; --} -- --static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) --{ -- const struct hlsl_type *array_type = get_array_type(type); -- unsigned int array_size = get_array_size(type); -- unsigned int field_count = 0; -- size_t fields_offset = 0; -- size_t i; -- -- if (type->bytecode_offset) -- return; -- -- if (array_type->type == HLSL_CLASS_STRUCT) -- { -- field_count = array_type->e.record.field_count; -- -- for (i = 0; i < field_count; ++i) -- { -- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -- -- field->name_bytecode_offset = put_string(buffer, field->name); -- write_sm1_type(buffer, field->type, ctab_start); -- } -- -- fields_offset = bytecode_get_size(buffer) - ctab_start; -- -- for (i = 0; i < field_count; ++i) -- { -- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -- -- put_u32(buffer, field->name_bytecode_offset - ctab_start); -- put_u32(buffer, field->type->bytecode_offset - ctab_start); -- } -- } -- -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); -- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -- put_u32(buffer, fields_offset); --} -- --static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) --{ -- struct hlsl_ir_var *var; -- -- list_remove(&to_sort->extern_entry); -- -- LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) -- { -- if (strcmp(to_sort->name, var->name) < 0) -- { -- list_add_before(&var->extern_entry, &to_sort->extern_entry); -- return; -- } -- } -- -- list_add_tail(sorted, &to_sort->extern_entry); --} -- --static void sm1_sort_externs(struct hlsl_ctx *ctx) --{ -- struct list sorted = LIST_INIT(sorted); -- struct hlsl_ir_var *var, *next; -- -- LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- sm1_sort_extern(&sorted, var); -- list_move_tail(&ctx->extern_vars, &sorted); --} -- --static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- struct hlsl_ir_function_decl *entry_func) --{ -- size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; -- unsigned int uniform_count = 0; -- struct hlsl_ir_var *var; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); -- -- if (!var->semantic.name && var->regs[regset].allocated) -- { -- ++uniform_count; -- -- if (var->is_param && var->is_uniform) -- { -- struct vkd3d_string_buffer *name; -- -- if (!(name = hlsl_get_string_buffer(ctx))) -- { -- buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; -- return; -- } -- vkd3d_string_buffer_printf(name, "$%s", var->name); -- vkd3d_free((char *)var->name); -- var->name = hlsl_strdup(ctx, name->buffer); -- hlsl_release_string_buffer(ctx, name); -- } -- } -- } -- -- sm1_sort_externs(ctx); -- -- size_offset = put_u32(buffer, 0); -- ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); -- -- ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); -- creator_offset = put_u32(buffer, 0); -- put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); -- put_u32(buffer, uniform_count); -- put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ -- put_u32(buffer, 0); /* FIXME: flags */ -- put_u32(buffer, 0); /* FIXME: target string */ -- -- vars_start = bytecode_get_size(buffer); -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); -- -- if (!var->semantic.name && var->regs[regset].allocated) -- { -- put_u32(buffer, 0); /* name */ -- if (var->data_type->type == HLSL_CLASS_OBJECT -- && (var->data_type->base_type == HLSL_TYPE_SAMPLER -- || var->data_type->base_type == HLSL_TYPE_TEXTURE)) -- { -- assert(regset == HLSL_REGSET_SAMPLERS); -- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); -- put_u32(buffer, 1); -- } -- else -- { -- assert(regset == HLSL_REGSET_NUMERIC); -- put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); -- put_u32(buffer, var->data_type->reg_size[regset] / 4); -- } -- put_u32(buffer, 0); /* type */ -- put_u32(buffer, 0); /* FIXME: default value */ -- } -- } -- -- uniform_count = 0; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); -- -- if (!var->semantic.name && var->regs[regset].allocated) -- { -- size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); -- size_t name_offset; -- -- name_offset = put_string(buffer, var->name); -- set_u32(buffer, var_offset, name_offset - ctab_start); -- -- write_sm1_type(buffer, var->data_type, ctab_start); -- set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); -- ++uniform_count; -- } -- } -- -- offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); -- set_u32(buffer, creator_offset, offset - ctab_start); -- -- ctab_end = bytecode_get_size(buffer); -- set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); --} -- --static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) --{ -- return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) -- | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); --} -- --struct sm1_instruction --{ -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; -- -- struct sm1_dst_register -- { -- D3DSHADER_PARAM_REGISTER_TYPE type; -- D3DSHADER_PARAM_DSTMOD_TYPE mod; -- unsigned int writemask; -- uint32_t reg; -- } dst; -- -- struct sm1_src_register -- { -- D3DSHADER_PARAM_REGISTER_TYPE type; -- D3DSHADER_PARAM_SRCMOD_TYPE mod; -- unsigned int swizzle; -- uint32_t reg; -- } srcs[3]; -- unsigned int src_count; -- -- unsigned int has_dst; --}; -- --static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) --{ -- assert(reg->writemask); -- put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); --} -- --static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, -- const struct sm1_src_register *reg) --{ -- put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); --} -- --static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct sm1_instruction *instr) --{ -- uint32_t token = instr->opcode; -- unsigned int i; -- -- if (ctx->profile->major_version > 1) -- token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; -- put_u32(buffer, token); -- -- if (instr->has_dst) -- write_sm1_dst_register(buffer, &instr->dst); -- -- for (i = 0; i < instr->src_count; ++i) -- write_sm1_src_register(buffer, &instr->srcs[i]); --}; -- --static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) --{ -- src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); --} -- --static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, -- const struct hlsl_reg *src3) --{ -- struct sm1_instruction instr = -- { -- .opcode = D3DSIO_DP2ADD, -- -- .dst.type = D3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -- .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -- .srcs[1].reg = src2->id, -- .srcs[2].type = D3DSPR_TEMP, -- .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), -- .srcs[2].reg = src3->id, -- .src_count = 3, -- }; -- -- write_sm1_instruction(ctx, buffer, &instr); --} -- --static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) --{ -- struct sm1_instruction instr = -- { -- .opcode = opcode, -- -- .dst.type = D3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -- .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -- .srcs[1].reg = src2->id, -- .src_count = 2, -- }; -- -- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); --} -- --static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) --{ -- struct sm1_instruction instr = -- { -- .opcode = opcode, -- -- .dst.type = D3DSPR_TEMP, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), -- .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), -- .srcs[1].reg = src2->id, -- .src_count = 2, -- }; -- -- write_sm1_instruction(ctx, buffer, &instr); --} -- --static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) --{ -- struct sm1_instruction instr = -- { -- .opcode = opcode, -- -- .dst.type = D3DSPR_TEMP, -- .dst.mod = dst_mod, -- .dst.writemask = dst->writemask, -- .dst.reg = dst->id, -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_TEMP, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), -- .srcs[0].reg = src->id, -- .srcs[0].mod = src_mod, -- .src_count = 1, -- }; -- -- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); --} -- --static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) --{ -- unsigned int i, x; -- -- for (i = 0; i < ctx->constant_defs.count; ++i) -- { -- uint32_t token = D3DSIO_DEF; -- const struct sm1_dst_register reg = -- { -- .type = D3DSPR_CONST, -- .writemask = VKD3DSP_WRITEMASK_ALL, -- .reg = i, -- }; -- -- if (ctx->profile->major_version > 1) -- token |= 5 << D3DSI_INSTLENGTH_SHIFT; -- put_u32(buffer, token); -- -- write_sm1_dst_register(buffer, ®); -- for (x = 0; x < 4; ++x) -- put_f32(buffer, ctx->constant_defs.values[i].f[x]); -- } --} -- --static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_var *var, bool output) --{ -- struct sm1_dst_register reg = {0}; -- uint32_t token, usage_idx; -- D3DDECLUSAGE usage; -- bool ret; -- -- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) -- { -- usage = 0; -- usage_idx = 0; -- } -- else -- { -- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); -- assert(ret); -- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; -- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; -- } -- -- token = D3DSIO_DCL; -- if (ctx->profile->major_version > 1) -- token |= 2 << D3DSI_INSTLENGTH_SHIFT; -- put_u32(buffer, token); -- -- token = (1u << 31); -- token |= usage << D3DSP_DCL_USAGE_SHIFT; -- token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; -- put_u32(buffer, token); -- -- reg.writemask = (1 << var->data_type->dimx) - 1; -- write_sm1_dst_register(buffer, ®); --} -- --static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) --{ -- bool write_in = false, write_out = false; -- struct hlsl_ir_var *var; -- -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -- write_in = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) -- write_in = write_out = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) -- write_in = true; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (write_in && var->is_input_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, false); -- if (write_out && var->is_output_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, true); -- } --} -- --static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); -- struct sm1_instruction sm1_instr = -- { -- .opcode = D3DSIO_MOV, -- -- .dst.type = D3DSPR_TEMP, -- .dst.reg = instr->reg.id, -- .dst.writemask = instr->reg.writemask, -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_CONST, -- .srcs[0].reg = constant->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), -- .src_count = 1, -- }; -- -- assert(instr->reg.allocated); -- assert(constant->reg.allocated); -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); --} -- --static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) --{ -- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -- struct hlsl_ir_node *arg1 = expr->operands[0].node; -- unsigned int i; -- -- for (i = 0; i < instr->data_type->dimx; ++i) -- { -- struct hlsl_reg src = arg1->reg, dst = instr->reg; -- -- src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); -- dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); -- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); -- } --} -- --static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) --{ -- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -- struct hlsl_ir_node *arg1 = expr->operands[0].node; -- struct hlsl_ir_node *arg2 = expr->operands[1].node; -- struct hlsl_ir_node *arg3 = expr->operands[2].node; -- -- assert(instr->reg.allocated); -- -- if (instr->data_type->base_type != HLSL_TYPE_FLOAT) -- { -- /* These need to be lowered. */ -- hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); -- return; -- } -- -- switch (expr->op) -- { -- case HLSL_OP1_ABS: -- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); -- break; -- -- case HLSL_OP1_EXP2: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); -- break; -- -- case HLSL_OP1_NEG: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -- break; -- -- case HLSL_OP1_SAT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); -- break; -- -- case HLSL_OP1_RCP: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); -- break; -- -- case HLSL_OP1_RSQ: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); -- break; -- -- case HLSL_OP2_ADD: -- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_MAX: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_MIN: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP2_MUL: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case HLSL_OP1_FRACT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); -- break; -- -- case HLSL_OP2_DOT: -- switch (arg1->data_type->dimx) -- { -- case 4: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- case 3: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_OP3_DP2ADD: -- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -- break; -- -- default: -- hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); -- break; -- } --} -- --static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_load *load = hlsl_ir_load(instr); -- const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); -- struct sm1_instruction sm1_instr = -- { -- .opcode = D3DSIO_MOV, -- -- .dst.type = D3DSPR_TEMP, -- .dst.reg = instr->reg.id, -- .dst.writemask = instr->reg.writemask, -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_TEMP, -- .srcs[0].reg = reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), -- .src_count = 1, -- }; -- -- assert(instr->reg.allocated); -- -- if (load->src.var->is_uniform) -- { -- assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_CONST; -- } -- else if (load->src.var->is_input_semantic) -- { -- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, -- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) -- { -- assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_INPUT; -- sm1_instr.srcs[0].reg = reg.id; -- } -- else -- sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); -- } -- -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); --} -- --static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_store *store = hlsl_ir_store(instr); -- const struct hlsl_ir_node *rhs = store->rhs.node; -- const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); -- struct sm1_instruction sm1_instr = -- { -- .opcode = D3DSIO_MOV, -- -- .dst.type = D3DSPR_TEMP, -- .dst.reg = reg.id, -- .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_TEMP, -- .srcs[0].reg = rhs->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), -- .src_count = 1, -- }; -- -- if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX) -- { -- FIXME("Matrix writemasks need to be lowered.\n"); -- return; -- } -- -- if (store->lhs.var->is_output_semantic) -- { -- if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, -- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) -- { -- assert(reg.allocated); -- sm1_instr.dst.type = D3DSPR_OUTPUT; -- sm1_instr.dst.reg = reg.id; -- } -- else -- sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; -- } -- else -- assert(reg.allocated); -- -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); --} -- --static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) --{ -- const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); -- const struct hlsl_ir_node *val = swizzle->val.node; -- struct sm1_instruction sm1_instr = -- { -- .opcode = D3DSIO_MOV, -- -- .dst.type = D3DSPR_TEMP, -- .dst.reg = instr->reg.id, -- .dst.writemask = instr->reg.writemask, -- .has_dst = 1, -- -- .srcs[0].type = D3DSPR_TEMP, -- .srcs[0].reg = val->reg.id, -- .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), -- swizzle->swizzle, instr->data_type->dimx), -- .src_count = 1, -- }; -- -- assert(instr->reg.allocated); -- assert(val->reg.allocated); -- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); --} -- --static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_function_decl *entry_func) --{ -- const struct hlsl_ir_node *instr; -- -- LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) -- { -- if (instr->data_type) -- { -- if (instr->data_type->type == HLSL_CLASS_MATRIX) -- { -- /* These need to be lowered. */ -- hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); -- continue; -- } -- else if (instr->data_type->type == HLSL_CLASS_OBJECT) -- { -- hlsl_fixme(ctx, &instr->loc, "Object copy."); -- break; -- } -- -- assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); -- } -- -- switch (instr->type) -- { -- case HLSL_IR_CALL: -- vkd3d_unreachable(); -- -- case HLSL_IR_CONSTANT: -- write_sm1_constant(ctx, buffer, instr); -- break; -- -- case HLSL_IR_EXPR: -- write_sm1_expr(ctx, buffer, instr); -- break; -- -- case HLSL_IR_LOAD: -- write_sm1_load(ctx, buffer, instr); -- break; -- -- case HLSL_IR_STORE: -- write_sm1_store(ctx, buffer, instr); -- break; -- -- case HLSL_IR_SWIZZLE: -- write_sm1_swizzle(ctx, buffer, instr); -- break; -- -- default: -- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -- } -- } --} -- --int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) --{ -- struct vkd3d_bytecode_buffer buffer = {0}; -- int ret; -- -- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); -- -- write_sm1_uniforms(ctx, &buffer, entry_func); -- -- write_sm1_constant_defs(ctx, &buffer); -- write_sm1_semantic_dcls(ctx, &buffer); -- write_sm1_instructions(ctx, &buffer, entry_func); -- -- put_u32(&buffer, D3DSIO_END); -- -- if (!(ret = buffer.status)) -- { -- out->code = buffer.data; -- out->size = buffer.size; -- } -- return ret; --} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c -deleted file mode 100644 -index 553a75818e7..00000000000 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c -+++ /dev/null -@@ -1,2531 +0,0 @@ --/* -- * HLSL code generation for DXBC shader models 4-5 -- * -- * Copyright 2019-2020 Zebediah Figura for CodeWeavers -- * -- * This library is free software; you can redistribute it and/or -- * modify it under the terms of the GNU Lesser General Public -- * License as published by the Free Software Foundation; either -- * version 2.1 of the License, or (at your option) any later version. -- * -- * This library is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * Lesser General Public License for more details. -- * -- * You should have received a copy of the GNU Lesser General Public -- * License along with this library; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -- */ -- --#include "hlsl.h" --#include --#include "d3dcommon.h" --#include "sm4.h" -- --static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); -- --static bool type_is_integer(const struct hlsl_type *type) --{ -- switch (type->base_type) -- { -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- return true; -- -- default: -- return false; -- } --} -- --bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) --{ -- unsigned int i; -- -- static const struct -- { -- const char *semantic; -- bool output; -- enum vkd3d_shader_type shader_type; -- enum vkd3d_sm4_swizzle_type swizzle_type; -- enum vkd3d_sm4_register_type type; -- bool has_idx; -- } -- register_table[] = -- { -- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, -- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, -- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, -- -- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, -- -- /* Put sv_target in this table, instead of letting it fall through to -- * default varying allocation, so that the register index matches the -- * usage index. */ -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, -- }; -- -- for (i = 0; i < ARRAY_SIZE(register_table); ++i) -- { -- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -- && output == register_table[i].output -- && ctx->profile->type == register_table[i].shader_type) -- { -- *type = register_table[i].type; -- if (swizzle_type) -- *swizzle_type = register_table[i].swizzle_type; -- *has_idx = register_table[i].has_idx; -- return true; -- } -- } -- -- return false; --} -- --bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3D_NAME *usage) --{ -- unsigned int i; -- -- static const struct -- { -- const char *name; -- bool output; -- enum vkd3d_shader_type shader_type; -- D3DDECLUSAGE usage; -- } -- semantics[] = -- { -- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -- -- {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, -- -- {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -- {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, -- -- {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, -- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -- -- {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, -- {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, -- -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, -- }; -- -- for (i = 0; i < ARRAY_SIZE(semantics); ++i) -- { -- if (!ascii_strcasecmp(semantic->name, semantics[i].name) -- && output == semantics[i].output -- && ctx->profile->type == semantics[i].shader_type -- && !ascii_strncasecmp(semantic->name, "sv_", 3)) -- { -- *usage = semantics[i].usage; -- return true; -- } -- } -- -- if (!ascii_strncasecmp(semantic->name, "sv_", 3)) -- return false; -- -- *usage = D3D_NAME_UNDEFINED; -- return true; --} -- --static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) --{ -- struct vkd3d_bytecode_buffer buffer = {0}; -- struct vkd3d_string_buffer *string; -- const struct hlsl_ir_var *var; -- size_t count_position; -- unsigned int i; -- bool ret; -- -- count_position = put_u32(&buffer, 0); -- put_u32(&buffer, 8); /* unknown */ -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -- enum vkd3d_sm4_register_type type; -- uint32_t usage_idx, reg_idx; -- D3D_NAME usage; -- bool has_idx; -- -- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -- continue; -- -- ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- assert(ret); -- if (usage == ~0u) -- continue; -- usage_idx = var->semantic.index; -- -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) -- { -- reg_idx = has_idx ? var->semantic.index : ~0u; -- } -- else -- { -- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -- type = VKD3D_SM4_RT_INPUT; -- reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; -- } -- -- use_mask = width; /* FIXME: accurately report use mask */ -- if (output) -- use_mask = 0xf ^ use_mask; -- -- /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ -- if (usage >= 64) -- usage = 0; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, usage_idx); -- put_u32(&buffer, usage); -- switch (var->data_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); -- break; -- -- case HLSL_TYPE_INT: -- put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); -- break; -- -- default: -- if ((string = hlsl_type_to_string(ctx, var->data_type))) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid data type %s for semantic variable %s.", string->buffer, var->name); -- hlsl_release_string_buffer(ctx, string); -- put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); -- } -- put_u32(&buffer, reg_idx); -- put_u32(&buffer, vkd3d_make_u16(width, use_mask)); -- } -- -- i = 0; -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- const char *semantic = var->semantic.name; -- size_t string_offset; -- D3D_NAME usage; -- -- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -- continue; -- -- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- if (usage == ~0u) -- continue; -- -- if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) -- string_offset = put_string(&buffer, "SV_Target"); -- else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) -- string_offset = put_string(&buffer, "SV_Depth"); -- else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) -- string_offset = put_string(&buffer, "SV_Position"); -- else -- string_offset = put_string(&buffer, semantic); -- set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); -- } -- -- set_u32(&buffer, count_position, i); -- -- dxbc_writer_add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, buffer.data, buffer.size); --} -- --static const struct hlsl_type *get_array_type(const struct hlsl_type *type) --{ -- if (type->type == HLSL_CLASS_ARRAY) -- return get_array_type(type->e.array.type); -- return type; --} -- --static unsigned int get_array_size(const struct hlsl_type *type) --{ -- if (type->type == HLSL_CLASS_ARRAY) -- return get_array_size(type->e.array.type) * type->e.array.elements_count; -- return 1; --} -- --static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) --{ -- switch (type->type) -- { -- case HLSL_CLASS_ARRAY: -- return sm4_class(type->e.array.type); -- case HLSL_CLASS_MATRIX: -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -- return D3D_SVC_MATRIX_COLUMNS; -- else -- return D3D_SVC_MATRIX_ROWS; -- case HLSL_CLASS_OBJECT: -- return D3D_SVC_OBJECT; -- case HLSL_CLASS_SCALAR: -- return D3D_SVC_SCALAR; -- case HLSL_CLASS_STRUCT: -- return D3D_SVC_STRUCT; -- case HLSL_CLASS_VECTOR: -- return D3D_SVC_VECTOR; -- default: -- ERR("Invalid class %#x.\n", type->type); -- vkd3d_unreachable(); -- } --} -- --static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) --{ -- switch (type->base_type) -- { -- case HLSL_TYPE_BOOL: -- return D3D_SVT_BOOL; -- case HLSL_TYPE_DOUBLE: -- return D3D_SVT_DOUBLE; -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- return D3D_SVT_FLOAT; -- case HLSL_TYPE_INT: -- return D3D_SVT_INT; -- case HLSL_TYPE_PIXELSHADER: -- return D3D_SVT_PIXELSHADER; -- case HLSL_TYPE_SAMPLER: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_SAMPLER1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_SAMPLER2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_SAMPLER3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SVT_SAMPLERCUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3D_SVT_SAMPLER; -- default: -- vkd3d_unreachable(); -- } -- break; -- case HLSL_TYPE_STRING: -- return D3D_SVT_STRING; -- case HLSL_TYPE_TEXTURE: -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SVT_TEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SVT_TEXTURE2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SVT_TEXTURE3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SVT_TEXTURECUBE; -- case HLSL_SAMPLER_DIM_GENERIC: -- return D3D_SVT_TEXTURE; -- default: -- vkd3d_unreachable(); -- } -- break; -- case HLSL_TYPE_UINT: -- return D3D_SVT_UINT; -- case HLSL_TYPE_VERTEXSHADER: -- return D3D_SVT_VERTEXSHADER; -- case HLSL_TYPE_VOID: -- return D3D_SVT_VOID; -- default: -- vkd3d_unreachable(); -- } --} -- --static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) --{ -- const struct hlsl_type *array_type = get_array_type(type); -- const char *name = array_type->name ? array_type->name : ""; -- const struct hlsl_profile_info *profile = ctx->profile; -- unsigned int field_count = 0, array_size = 0; -- size_t fields_offset = 0, name_offset = 0; -- size_t i; -- -- if (type->bytecode_offset) -- return; -- -- if (profile->major_version >= 5) -- name_offset = put_string(buffer, name); -- -- if (type->type == HLSL_CLASS_ARRAY) -- array_size = get_array_size(type); -- -- if (array_type->type == HLSL_CLASS_STRUCT) -- { -- field_count = array_type->e.record.field_count; -- -- for (i = 0; i < field_count; ++i) -- { -- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -- -- field->name_bytecode_offset = put_string(buffer, field->name); -- write_sm4_type(ctx, buffer, field->type); -- } -- -- fields_offset = bytecode_get_size(buffer); -- -- for (i = 0; i < field_count; ++i) -- { -- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -- -- put_u32(buffer, field->name_bytecode_offset); -- put_u32(buffer, field->type->bytecode_offset); -- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); -- } -- } -- -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); -- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -- put_u32(buffer, fields_offset); -- -- if (profile->major_version >= 5) -- { -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, 0); /* FIXME: unknown */ -- put_u32(buffer, name_offset); -- } --} -- --static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) --{ -- switch (type->base_type) -- { -- case HLSL_TYPE_SAMPLER: -- return D3D_SIT_SAMPLER; -- case HLSL_TYPE_TEXTURE: -- return D3D_SIT_TEXTURE; -- case HLSL_TYPE_UAV: -- return D3D_SIT_UAV_RWTYPED; -- default: -- vkd3d_unreachable(); -- } --} -- --static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) --{ -- switch (type->e.resource_format->base_type) -- { -- case HLSL_TYPE_DOUBLE: -- return D3D_RETURN_TYPE_DOUBLE; -- -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- return D3D_RETURN_TYPE_FLOAT; -- -- case HLSL_TYPE_INT: -- return D3D_RETURN_TYPE_SINT; -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- return D3D_RETURN_TYPE_UINT; -- -- default: -- vkd3d_unreachable(); -- } --} -- --static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) --{ -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return D3D_SRV_DIMENSION_TEXTURE1D; -- case HLSL_SAMPLER_DIM_2D: -- return D3D_SRV_DIMENSION_TEXTURE2D; -- case HLSL_SAMPLER_DIM_3D: -- return D3D_SRV_DIMENSION_TEXTURE3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return D3D_SRV_DIMENSION_TEXTURECUBE; -- case HLSL_SAMPLER_DIM_1DARRAY: -- return D3D_SRV_DIMENSION_TEXTURE1DARRAY; -- case HLSL_SAMPLER_DIM_2DARRAY: -- return D3D_SRV_DIMENSION_TEXTURE2DARRAY; -- case HLSL_SAMPLER_DIM_2DMS: -- return D3D_SRV_DIMENSION_TEXTURE2DMS; -- case HLSL_SAMPLER_DIM_2DMSARRAY: -- return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; -- case HLSL_SAMPLER_DIM_CUBEARRAY: -- return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; -- default: -- vkd3d_unreachable(); -- } --} -- --static int sm4_compare_extern_resources(const void *a, const void *b) --{ -- const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; -- const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; -- enum hlsl_regset aa_regset, bb_regset; -- -- aa_regset = hlsl_type_get_regset(aa->data_type); -- bb_regset = hlsl_type_get_regset(bb->data_type); -- -- if (aa_regset != bb_regset) -- return aa_regset - bb_regset; -- -- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; --} -- --static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) --{ -- const struct hlsl_ir_var **extern_resources = NULL; -- const struct hlsl_ir_var *var; -- enum hlsl_regset regset; -- size_t capacity = 0; -- -- *count = 0; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (!hlsl_type_is_resource(var->data_type)) -- continue; -- regset = hlsl_type_get_regset(var->data_type); -- if (!var->regs[regset].allocated) -- continue; -- -- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -- sizeof(*extern_resources)))) -- { -- *count = 0; -- return NULL; -- } -- -- extern_resources[*count] = var; -- ++*count; -- } -- -- qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); -- return extern_resources; --} -- --static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) --{ -- unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; -- size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -- size_t cbuffer_position, resource_position, creator_position; -- const struct hlsl_profile_info *profile = ctx->profile; -- const struct hlsl_ir_var **extern_resources; -- struct vkd3d_bytecode_buffer buffer = {0}; -- const struct hlsl_buffer *cbuffer; -- const struct hlsl_ir_var *var; -- -- static const uint16_t target_types[] = -- { -- 0xffff, /* PIXEL */ -- 0xfffe, /* VERTEX */ -- 0x4753, /* GEOMETRY */ -- 0x4853, /* HULL */ -- 0x4453, /* DOMAIN */ -- 0x4353, /* COMPUTE */ -- }; -- -- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -- -- resource_count += extern_resources_count; -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (cbuffer->reg.allocated) -- { -- ++cbuffer_count; -- ++resource_count; -- } -- } -- -- put_u32(&buffer, cbuffer_count); -- cbuffer_position = put_u32(&buffer, 0); -- put_u32(&buffer, resource_count); -- resource_position = put_u32(&buffer, 0); -- put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), -- target_types[profile->type])); -- put_u32(&buffer, 0); /* FIXME: compilation flags */ -- creator_position = put_u32(&buffer, 0); -- -- if (profile->major_version >= 5) -- { -- put_u32(&buffer, TAG_RD11); -- put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ -- put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -- put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ -- put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ -- put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ -- put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -- put_u32(&buffer, 0); /* unknown; possibly a null terminator */ -- } -- -- /* Bound resources. */ -- -- resources_offset = bytecode_get_size(&buffer); -- set_u32(&buffer, resource_position, resources_offset); -- -- for (i = 0; i < extern_resources_count; ++i) -- { -- enum hlsl_regset regset; -- uint32_t flags = 0; -- -- var = extern_resources[i]; -- regset = hlsl_type_get_regset(var->data_type); -- -- if (var->reg_reservation.type) -- flags |= D3D_SIF_USERPACKED; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, sm4_resource_type(var->data_type)); -- if (regset == HLSL_REGSET_SAMPLERS) -- { -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- } -- else -- { -- put_u32(&buffer, sm4_resource_format(var->data_type)); -- put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); -- put_u32(&buffer, ~0u); /* FIXME: multisample count */ -- flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; -- } -- put_u32(&buffer, var->regs[regset].id); -- put_u32(&buffer, 1); /* bind count */ -- put_u32(&buffer, flags); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- uint32_t flags = 0; -- -- if (!cbuffer->reg.allocated) -- continue; -- -- if (cbuffer->reservation.type) -- flags |= D3D_SIF_USERPACKED; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); -- put_u32(&buffer, 0); /* return type */ -- put_u32(&buffer, 0); /* dimension */ -- put_u32(&buffer, 0); /* multisample count */ -- put_u32(&buffer, cbuffer->reg.id); /* bind point */ -- put_u32(&buffer, 1); /* bind count */ -- put_u32(&buffer, flags); /* flags */ -- } -- -- for (i = 0; i < extern_resources_count; ++i) -- { -- var = extern_resources[i]; -- -- string_offset = put_string(&buffer, var->name); -- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (!cbuffer->reg.allocated) -- continue; -- -- string_offset = put_string(&buffer, cbuffer->name); -- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); -- } -- -- /* Buffers. */ -- -- cbuffers_offset = bytecode_get_size(&buffer); -- set_u32(&buffer, cbuffer_position, cbuffers_offset); -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- unsigned int var_count = 0; -- -- if (!cbuffer->reg.allocated) -- continue; -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform && var->buffer == cbuffer) -- ++var_count; -- } -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, var_count); -- put_u32(&buffer, 0); /* variable offset */ -- put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); -- put_u32(&buffer, 0); /* FIXME: flags */ -- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); -- } -- -- i = 0; -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (!cbuffer->reg.allocated) -- continue; -- -- string_offset = put_string(&buffer, cbuffer->name); -- set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); -- } -- -- i = 0; -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- size_t vars_start = bytecode_get_size(&buffer); -- -- if (!cbuffer->reg.allocated) -- continue; -- -- set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform && var->buffer == cbuffer) -- { -- uint32_t flags = 0; -- -- if (var->last_read) -- flags |= D3D_SVF_USED; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, var->buffer_offset * sizeof(float)); -- put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); -- put_u32(&buffer, flags); -- put_u32(&buffer, 0); /* type */ -- put_u32(&buffer, 0); /* FIXME: default value */ -- -- if (profile->major_version >= 5) -- { -- put_u32(&buffer, 0); /* texture start */ -- put_u32(&buffer, 0); /* texture count */ -- put_u32(&buffer, 0); /* sampler start */ -- put_u32(&buffer, 0); /* sampler count */ -- } -- } -- } -- -- j = 0; -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform && var->buffer == cbuffer) -- { -- const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); -- size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); -- size_t string_offset = put_string(&buffer, var->name); -- -- set_u32(&buffer, var_offset, string_offset); -- write_sm4_type(ctx, &buffer, var->data_type); -- set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); -- ++j; -- } -- } -- } -- -- creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); -- set_u32(&buffer, creator_position, creator_offset); -- -- dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size); -- -- vkd3d_free(extern_resources); --} -- --static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) --{ -- switch (type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_1D: -- return VKD3D_SM4_RESOURCE_TEXTURE_1D; -- case HLSL_SAMPLER_DIM_2D: -- return VKD3D_SM4_RESOURCE_TEXTURE_2D; -- case HLSL_SAMPLER_DIM_3D: -- return VKD3D_SM4_RESOURCE_TEXTURE_3D; -- case HLSL_SAMPLER_DIM_CUBE: -- return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; -- case HLSL_SAMPLER_DIM_1DARRAY: -- return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; -- case HLSL_SAMPLER_DIM_2DARRAY: -- return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; -- case HLSL_SAMPLER_DIM_2DMS: -- return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; -- case HLSL_SAMPLER_DIM_2DMSARRAY: -- return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; -- case HLSL_SAMPLER_DIM_CUBEARRAY: -- return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; -- default: -- vkd3d_unreachable(); -- } --} -- --struct sm4_instruction_modifier --{ -- enum vkd3d_sm4_instruction_modifier type; -- -- union -- { -- struct -- { -- int u, v, w; -- } aoffimmi; -- } u; --}; -- --static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) --{ -- uint32_t word = 0; -- -- word |= VKD3D_SM4_MODIFIER_MASK & imod->type; -- -- switch (imod->type) -- { -- case VKD3D_SM4_MODIFIER_AOFFIMMI: -- assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); -- assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); -- assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); -- word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; -- word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; -- word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; -- break; -- -- default: -- vkd3d_unreachable(); -- } -- -- return word; --} -- --struct sm4_register --{ -- enum vkd3d_sm4_register_type type; -- uint32_t idx[2]; -- unsigned int idx_count; -- enum vkd3d_sm4_dimension dim; -- uint32_t immconst_uint[4]; -- unsigned int mod; --}; -- --struct sm4_instruction --{ -- enum vkd3d_sm4_opcode opcode; -- -- struct sm4_instruction_modifier modifiers[1]; -- unsigned int modifier_count; -- -- struct sm4_dst_register -- { -- struct sm4_register reg; -- unsigned int writemask; -- } dsts[2]; -- unsigned int dst_count; -- -- struct sm4_src_register -- { -- struct sm4_register reg; -- enum vkd3d_sm4_swizzle_type swizzle_type; -- unsigned int swizzle; -- } srcs[4]; -- unsigned int src_count; -- -- uint32_t idx[3]; -- unsigned int idx_count; --}; -- --static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, -- unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, -- const struct hlsl_deref *deref, const struct hlsl_type *data_type) --{ -- const struct hlsl_ir_var *var = deref->var; -- -- if (var->is_uniform) -- { -- if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) -- { -- reg->type = VKD3D_SM4_RT_RESOURCE; -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- if (swizzle_type) -- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; -- reg->idx_count = 1; -- *writemask = VKD3DSP_WRITEMASK_ALL; -- } -- else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) -- { -- reg->type = VKD3D_SM5_RT_UAV; -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- if (swizzle_type) -- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; -- reg->idx_count = 1; -- *writemask = VKD3DSP_WRITEMASK_ALL; -- } -- else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) -- { -- reg->type = VKD3D_SM4_RT_SAMPLER; -- reg->dim = VKD3D_SM4_DIMENSION_NONE; -- if (swizzle_type) -- *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; -- reg->idx_count = 1; -- *writemask = VKD3DSP_WRITEMASK_ALL; -- } -- else -- { -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; -- -- assert(data_type->type <= HLSL_CLASS_VECTOR); -- reg->type = VKD3D_SM4_RT_CONSTBUFFER; -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- if (swizzle_type) -- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = var->buffer->reg.id; -- reg->idx[1] = offset / 4; -- reg->idx_count = 2; -- *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); -- } -- } -- else if (var->is_input_semantic) -- { -- bool has_idx; -- -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) -- { -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -- -- if (has_idx) -- { -- reg->idx[0] = var->semantic.index + offset / 4; -- reg->idx_count = 1; -- } -- -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -- } -- else -- { -- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -- -- assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_INPUT; -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- if (swizzle_type) -- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = hlsl_reg.id; -- reg->idx_count = 1; -- *writemask = hlsl_reg.writemask; -- } -- } -- else if (var->is_output_semantic) -- { -- bool has_idx; -- -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) -- { -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -- -- if (has_idx) -- { -- reg->idx[0] = var->semantic.index + offset / 4; -- reg->idx_count = 1; -- } -- -- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) -- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -- else -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -- } -- else -- { -- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -- -- assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_OUTPUT; -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- reg->idx[0] = hlsl_reg.id; -- reg->idx_count = 1; -- *writemask = hlsl_reg.writemask; -- } -- } -- else -- { -- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -- -- assert(hlsl_reg.allocated); -- reg->type = VKD3D_SM4_RT_TEMP; -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- if (swizzle_type) -- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = hlsl_reg.id; -- reg->idx_count = 1; -- *writemask = hlsl_reg.writemask; -- } --} -- --static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, -- const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) --{ -- unsigned int writemask; -- -- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); -- if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) -- src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); --} -- --static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, -- enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) --{ -- assert(instr->reg.allocated); -- reg->type = VKD3D_SM4_RT_TEMP; -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -- reg->idx[0] = instr->reg.id; -- reg->idx_count = 1; -- *writemask = instr->reg.writemask; --} -- --static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) --{ -- unsigned int swizzle_type; -- -- sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); --} -- --static void sm4_src_from_node(struct sm4_src_register *src, -- const struct hlsl_ir_node *instr, unsigned int map_writemask) --{ -- unsigned int writemask; -- -- sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); -- if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) -- src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); --} -- --static uint32_t sm4_encode_register(const struct sm4_register *reg) --{ -- return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) -- | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) -- | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); --} -- --static uint32_t sm4_register_order(const struct sm4_register *reg) --{ -- uint32_t order = 1; -- if (reg->type == VKD3D_SM4_RT_IMMCONST) -- order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; -- order += reg->idx_count; -- if (reg->mod) -- ++order; -- return order; --} -- --static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) --{ -- uint32_t token = instr->opcode; -- unsigned int size = 1, i, j; -- -- size += instr->modifier_count; -- for (i = 0; i < instr->dst_count; ++i) -- size += sm4_register_order(&instr->dsts[i].reg); -- for (i = 0; i < instr->src_count; ++i) -- size += sm4_register_order(&instr->srcs[i].reg); -- size += instr->idx_count; -- -- token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); -- -- if (instr->modifier_count > 0) -- token |= VKD3D_SM4_INSTRUCTION_MODIFIER; -- put_u32(buffer, token); -- -- for (i = 0; i < instr->modifier_count; ++i) -- { -- token = sm4_encode_instruction_modifier(&instr->modifiers[i]); -- if (instr->modifier_count > i + 1) -- token |= VKD3D_SM4_INSTRUCTION_MODIFIER; -- put_u32(buffer, token); -- } -- -- for (i = 0; i < instr->dst_count; ++i) -- { -- token = sm4_encode_register(&instr->dsts[i].reg); -- if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -- token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; -- put_u32(buffer, token); -- -- for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) -- put_u32(buffer, instr->dsts[i].reg.idx[j]); -- } -- -- for (i = 0; i < instr->src_count; ++i) -- { -- token = sm4_encode_register(&instr->srcs[i].reg); -- token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -- token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; -- if (instr->srcs[i].reg.mod) -- token |= VKD3D_SM4_EXTENDED_OPERAND; -- put_u32(buffer, token); -- -- if (instr->srcs[i].reg.mod) -- put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -- | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); -- -- for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) -- put_u32(buffer, instr->srcs[i].reg.idx[j]); -- -- if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) -- { -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); -- if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) -- { -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); -- put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); -- } -- } -- } -- -- for (j = 0; j < instr->idx_count; ++j) -- put_u32(buffer, instr->idx[j]); --} -- --static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, -- const struct hlsl_ir_node *texel_offset) --{ -- struct sm4_instruction_modifier modif; -- struct hlsl_ir_constant *offset; -- -- if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) -- return false; -- offset = hlsl_ir_constant(texel_offset); -- -- modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; -- modif.u.aoffimmi.u = offset->value[0].i; -- modif.u.aoffimmi.v = offset->value[1].i; -- modif.u.aoffimmi.w = offset->value[2].i; -- if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 -- || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 -- || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) -- return false; -- -- instr->modifiers[instr->modifier_count++] = modif; -- return true; --} -- --static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) --{ -- const struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, -- -- .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, -- .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, -- .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, -- .srcs[0].reg.idx_count = 2, -- .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, -- .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), -- .src_count = 1, -- }; -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) --{ -- const struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_DCL_SAMPLER, -- -- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, -- .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, -- .dsts[0].reg.idx_count = 1, -- .dst_count = 1, -- }; -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) --{ -- bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); -- struct sm4_instruction instr = -- { -- .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) -- | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), -- -- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, -- .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, -- .dsts[0].reg.idx_count = 1, -- .dst_count = 1, -- -- .idx[0] = sm4_resource_format(var->data_type) * 0x1111, -- .idx_count = 1, -- }; -- -- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -- || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) -- { -- instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; -- } -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) --{ -- const struct hlsl_profile_info *profile = ctx->profile; -- const bool output = var->is_output_semantic; -- D3D_NAME usage; -- bool has_idx; -- -- struct sm4_instruction instr = -- { -- .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, -- .dst_count = 1, -- }; -- -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) -- { -- if (has_idx) -- { -- instr.dsts[0].reg.idx[0] = var->semantic.index; -- instr.dsts[0].reg.idx_count = 1; -- } -- else -- { -- instr.dsts[0].reg.idx_count = 0; -- } -- instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; -- } -- else -- { -- instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; -- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; -- instr.dsts[0].reg.idx_count = 1; -- instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; -- } -- -- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) -- instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; -- -- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- if (usage == ~0u) -- usage = D3D_NAME_UNDEFINED; -- -- if (var->is_input_semantic) -- { -- switch (usage) -- { -- case D3D_NAME_UNDEFINED: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; -- break; -- -- case D3D_NAME_INSTANCE_ID: -- case D3D_NAME_PRIMITIVE_ID: -- case D3D_NAME_VERTEX_ID: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; -- break; -- -- default: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; -- break; -- } -- -- if (profile->type == VKD3D_SHADER_TYPE_PIXEL) -- { -- enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; -- -- if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) -- mode = VKD3DSIM_CONSTANT; -- -- instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -- } -- } -- else -- { -- if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) -- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; -- else -- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; -- } -- -- switch (usage) -- { -- case D3D_NAME_COVERAGE: -- case D3D_NAME_DEPTH: -- case D3D_NAME_DEPTH_GREATER_EQUAL: -- case D3D_NAME_DEPTH_LESS_EQUAL: -- case D3D_NAME_TARGET: -- case D3D_NAME_UNDEFINED: -- break; -- -- default: -- instr.idx_count = 1; -- instr.idx[0] = usage; -- break; -- } -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_DCL_TEMPS, -- -- .idx = {temp_count}, -- .idx_count = 1, -- }; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, -- -- .idx = {thread_count[0], thread_count[1], thread_count[2]}, -- .idx_count = 3, -- }; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_RET, -- }; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); -- instr.srcs[0].reg.mod = src_mod; -- instr.src_count = 1; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, -- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, -- const struct hlsl_ir_node *src) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- assert(dst_idx < ARRAY_SIZE(instr.dsts)); -- sm4_dst_from_node(&instr.dsts[dst_idx], dst); -- assert(1 - dst_idx >= 0); -- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; -- instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; -- instr.dsts[1 - dst_idx].reg.idx_count = 0; -- instr.dst_count = 2; -- -- sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); -- instr.src_count = 1; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); -- sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); -- instr.src_count = 2; -- -- write_sm4_instruction(buffer, &instr); --} -- --/* dp# instructions don't map the swizzle. */ --static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 2; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, -- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, -- const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- assert(dst_idx < ARRAY_SIZE(instr.dsts)); -- sm4_dst_from_node(&instr.dsts[dst_idx], dst); -- assert(1 - dst_idx >= 0); -- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; -- instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; -- instr.dsts[1 - dst_idx].reg.idx_count = 0; -- instr.dst_count = 2; -- -- sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); -- sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); -- instr.src_count = 2; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_constant(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) --{ -- const unsigned int dimx = constant->node.data_type->dimx; -- struct sm4_instruction instr; -- struct sm4_register *reg = &instr.srcs[0].reg; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_dst_from_node(&instr.dsts[0], &constant->node); -- instr.dst_count = 1; -- -- instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- reg->type = VKD3D_SM4_RT_IMMCONST; -- if (dimx == 1) -- { -- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -- reg->immconst_uint[0] = constant->value[0].u; -- } -- else -- { -- unsigned int i, j = 0; -- -- reg->dim = VKD3D_SM4_DIMENSION_VEC4; -- for (i = 0; i < 4; ++i) -- { -- if (instr.dsts[0].writemask & (1u << i)) -- reg->immconst_uint[i] = constant->value[j++].u; -- } -- } -- instr.src_count = 1, -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, -- const struct hlsl_ir_node *texel_offset) --{ -- bool uav = (resource_type->base_type == HLSL_TYPE_UAV); -- struct sm4_instruction instr; -- unsigned int dim_count; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD; -- -- if (texel_offset) -- { -- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -- { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -- "Offset must resolve to integer literal in the range -8 to 7."); -- return; -- } -- } -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- -- if (!uav) -- { -- /* Mipmap level is in the last component in the IR, but needs to be in the W -- * component in the instruction. */ -- dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); -- if (dim_count == 1) -- instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); -- if (dim_count == 2) -- instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); -- } -- -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); -- -- instr.src_count = 2; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, -- const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_SAMPLE; -- -- if (texel_offset) -- { -- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -- { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -- "Offset must resolve to integer literal in the range -8 to 7."); -- return; -- } -- } -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); -- sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 3; -- -- write_sm4_instruction(buffer, &instr); --} -- --static bool type_is_float(const struct hlsl_type *type) --{ -- return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; --} -- --static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, -- const struct hlsl_ir_node *arg, uint32_t mask) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_AND; -- -- sm4_dst_from_node(&instr.dsts[0], &expr->node); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); -- instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -- instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; -- instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; -- instr.srcs[1].reg.immconst_uint[0] = mask; -- instr.src_count = 2; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_cast(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) --{ -- static const union -- { -- uint32_t u; -- float f; -- } one = { .f = 1.0 }; -- const struct hlsl_ir_node *arg1 = expr->operands[0].node; -- const struct hlsl_type *dst_type = expr->node.data_type; -- const struct hlsl_type *src_type = arg1->data_type; -- -- /* Narrowing casts were already lowered. */ -- assert(src_type->dimx == dst_type->dimx); -- -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- switch (src_type->base_type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_INT: -- switch (src_type->base_type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_UINT: -- switch (src_type->base_type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -- break; -- -- case HLSL_TYPE_HALF: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to half."); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); -- break; -- -- case HLSL_TYPE_BOOL: -- /* Casts to bool should have already been lowered. */ -- default: -- vkd3d_unreachable(); -- } --} -- --static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; -- -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 2; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_expr(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) --{ -- const struct hlsl_ir_node *arg1 = expr->operands[0].node; -- const struct hlsl_ir_node *arg2 = expr->operands[1].node; -- const struct hlsl_type *dst_type = expr->node.data_type; -- struct vkd3d_string_buffer *dst_type_string; -- -- assert(expr->node.reg.allocated); -- -- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) -- return; -- -- switch (expr->op) -- { -- case HLSL_OP1_ABS: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_BIT_NOT: -- assert(type_is_integer(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_CAST: -- write_sm4_cast(ctx, buffer, expr); -- break; -- -- case HLSL_OP1_COS: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); -- break; -- -- case HLSL_OP1_EXP2: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_FLOOR: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_FRACT: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_LOG2: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_LOGIC_NOT: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_NEG: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_REINTERPRET: -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_ROUND: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_RSQ: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_SAT: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV -- | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), -- &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_SIN: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); -- break; -- -- case HLSL_OP1_SQRT: -- assert(type_is_float(dst_type)); -- write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP2_ADD: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_BIT_AND: -- assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_BIT_OR: -- assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_BIT_XOR: -- assert(type_is_integer(dst_type)); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_DIV: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_DOT: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- switch (arg1->data_type->dimx) -- { -- case 4: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); -- break; -- -- case 3: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); -- break; -- -- case 2: -- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); -- break; -- -- case 1: -- default: -- vkd3d_unreachable(); -- } -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_EQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- -- switch (src_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_GEQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- -- switch (src_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_LESS: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- -- switch (src_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_LOGIC_AND: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_LOGIC_OR: -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_LSHIFT: -- assert(type_is_integer(dst_type)); -- assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_MAX: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_MIN: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_MOD: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_MUL: -- switch (dst_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- /* Using IMUL instead of UMUL because we're taking the low -- * bits, and the native compiler generates IMUL. */ -- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_NEQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- assert(dst_type->base_type == HLSL_TYPE_BOOL); -- -- switch (src_type->base_type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -- debug_hlsl_type(ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_RSHIFT: -- assert(type_is_integer(dst_type)); -- assert(dst_type->base_type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -- &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -- } -- -- hlsl_release_string_buffer(ctx, dst_type_string); --} -- --static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, -- .src_count = 1, -- }; -- -- assert(iff->condition.node->data_type->dimx == 1); -- -- sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -- write_sm4_instruction(buffer, &instr); -- -- write_sm4_block(ctx, buffer, &iff->then_instrs); -- -- if (!list_empty(&iff->else_instrs.instrs)) -- { -- instr.opcode = VKD3D_SM4_OP_ELSE; -- instr.src_count = 0; -- write_sm4_instruction(buffer, &instr); -- -- write_sm4_block(ctx, buffer, &iff->else_instrs); -- } -- -- instr.opcode = VKD3D_SM4_OP_ENDIF; -- instr.src_count = 0; -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_jump(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) --{ -- struct sm4_instruction instr = {0}; -- -- switch (jump->type) -- { -- case HLSL_IR_JUMP_BREAK: -- instr.opcode = VKD3D_SM4_OP_BREAK; -- break; -- -- case HLSL_IR_JUMP_RETURN: -- vkd3d_unreachable(); -- -- default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); -- return; -- } -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_load(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_dst_from_node(&instr.dsts[0], &load->node); -- instr.dst_count = 1; -- -- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, load->node.data_type, instr.dsts[0].writemask); -- instr.src_count = 1; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_loop(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_LOOP, -- }; -- -- write_sm4_instruction(buffer, &instr); -- -- write_sm4_block(ctx, buffer, &loop->body); -- -- instr.opcode = VKD3D_SM4_OP_ENDLOOP; -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, -- const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) --{ -- struct sm4_src_register *src; -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- -- instr.opcode = VKD3D_SM4_OP_GATHER4; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); -- -- if (texel_offset) -- { -- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -- { -- if (ctx->profile->major_version < 5) -- { -- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -- "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); -- return; -- } -- instr.opcode = VKD3D_SM5_OP_GATHER4_PO; -- sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); -- } -- } -- -- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); -- -- src = &instr.srcs[instr.src_count++]; -- sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); -- src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; -- src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; -- src->swizzle = swizzle; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_resource_load(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) --{ -- const struct hlsl_type *resource_type = load->resource.var->data_type; -- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -- const struct hlsl_ir_node *coords = load->coords.node; -- -- if (resource_type->type != HLSL_CLASS_OBJECT) -- { -- assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); -- hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); -- return; -- } -- -- if (load->sampler.var) -- { -- const struct hlsl_type *sampler_type = load->sampler.var->data_type; -- -- if (sampler_type->type != HLSL_CLASS_OBJECT) -- { -- assert(sampler_type->type == HLSL_CLASS_ARRAY || sampler_type->type == HLSL_CLASS_STRUCT); -- hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); -- return; -- } -- assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); -- assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC); -- -- if (!load->sampler.var->is_uniform) -- { -- hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -- return; -- } -- } -- -- if (!load->resource.var->is_uniform) -- { -- hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); -- return; -- } -- -- switch (load->load_type) -- { -- case HLSL_RESOURCE_LOAD: -- write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, -- coords, texel_offset); -- break; -- -- case HLSL_RESOURCE_SAMPLE: -- if (!load->sampler.var) -- { -- hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); -- return; -- } -- write_sm4_sample(ctx, buffer, resource_type, &load->node, -- &load->resource, &load->sampler, coords, texel_offset); -- break; -- -- case HLSL_RESOURCE_GATHER_RED: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); -- break; -- -- case HLSL_RESOURCE_GATHER_GREEN: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); -- break; -- -- case HLSL_RESOURCE_GATHER_BLUE: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); -- break; -- -- case HLSL_RESOURCE_GATHER_ALPHA: -- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, -- &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); -- break; -- -- case HLSL_RESOURCE_SAMPLE_LOD: -- hlsl_fixme(ctx, &load->node.loc, "SM4 sample-LOD expression."); -- break; -- } --} -- --static void write_sm4_resource_store(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) --{ -- const struct hlsl_type *resource_type = store->resource.var->data_type; -- -- if (resource_type->type != HLSL_CLASS_OBJECT) -- { -- assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); -- hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); -- return; -- } -- -- if (!store->resource.var->is_uniform) -- { -- hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); -- return; -- } -- -- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); --} -- --static void write_sm4_store(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) --{ -- const struct hlsl_ir_node *rhs = store->rhs.node; -- struct sm4_instruction instr; -- unsigned int writemask; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); -- instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); -- instr.dst_count = 1; -- -- sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); -- instr.src_count = 1; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_swizzle(struct hlsl_ctx *ctx, -- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) --{ -- struct sm4_instruction instr; -- unsigned int writemask; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); -- instr.dst_count = 1; -- -- sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); -- instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), -- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); -- instr.src_count = 1; -- -- write_sm4_instruction(buffer, &instr); --} -- --static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block) --{ -- const struct hlsl_ir_node *instr; -- -- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -- { -- if (instr->data_type) -- { -- if (instr->data_type->type == HLSL_CLASS_MATRIX) -- { -- hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); -- break; -- } -- else if (instr->data_type->type == HLSL_CLASS_OBJECT) -- { -- hlsl_fixme(ctx, &instr->loc, "Object copy."); -- break; -- } -- -- assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); -- } -- -- switch (instr->type) -- { -- case HLSL_IR_CALL: -- vkd3d_unreachable(); -- -- case HLSL_IR_CONSTANT: -- write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); -- break; -- -- case HLSL_IR_EXPR: -- write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); -- break; -- -- case HLSL_IR_IF: -- write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); -- break; -- -- case HLSL_IR_JUMP: -- write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); -- break; -- -- case HLSL_IR_LOAD: -- write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); -- break; -- -- case HLSL_IR_RESOURCE_LOAD: -- write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); -- break; -- -- case HLSL_IR_RESOURCE_STORE: -- write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); -- break; -- -- case HLSL_IR_LOOP: -- write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); -- break; -- -- case HLSL_IR_STORE: -- write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); -- break; -- -- case HLSL_IR_SWIZZLE: -- write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); -- break; -- -- default: -- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -- } -- } --} -- --static void write_sm4_shdr(struct hlsl_ctx *ctx, -- const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) --{ -- const struct hlsl_profile_info *profile = ctx->profile; -- const struct hlsl_ir_var **extern_resources; -- struct vkd3d_bytecode_buffer buffer = {0}; -- unsigned int extern_resources_count, i; -- const struct hlsl_buffer *cbuffer; -- const struct hlsl_ir_var *var; -- size_t token_count_position; -- -- static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = -- { -- VKD3D_SM4_PS, -- VKD3D_SM4_VS, -- VKD3D_SM4_GS, -- VKD3D_SM5_HS, -- VKD3D_SM5_DS, -- VKD3D_SM5_CS, -- 0, /* EFFECT */ -- 0, /* TEXTURE */ -- VKD3D_SM4_LIB, -- }; -- -- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -- -- put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); -- token_count_position = put_u32(&buffer, 0); -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (cbuffer->reg.allocated) -- write_sm4_dcl_constant_buffer(&buffer, cbuffer); -- } -- -- for (i = 0; i < extern_resources_count; ++i) -- { -- var = extern_resources[i]; -- -- if (var->data_type->base_type == HLSL_TYPE_SAMPLER) -- write_sm4_dcl_sampler(&buffer, var); -- else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) -- write_sm4_dcl_texture(&buffer, var); -- } -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -- write_sm4_dcl_semantic(ctx, &buffer, var); -- } -- -- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) -- write_sm4_dcl_thread_group(&buffer, ctx->thread_count); -- -- if (ctx->temp_count) -- write_sm4_dcl_temps(&buffer, ctx->temp_count); -- -- write_sm4_block(ctx, &buffer, &entry_func->body); -- -- write_sm4_ret(&buffer); -- -- set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); -- -- dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size); -- -- vkd3d_free(extern_resources); --} -- --int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) --{ -- struct dxbc_writer dxbc; -- size_t i; -- int ret; -- -- dxbc_writer_init(&dxbc); -- -- write_sm4_signature(ctx, &dxbc, false); -- write_sm4_signature(ctx, &dxbc, true); -- write_sm4_rdef(ctx, &dxbc); -- write_sm4_shdr(ctx, entry_func, &dxbc); -- -- if (!(ret = ctx->result)) -- ret = dxbc_writer_write(&dxbc, out); -- for (i = 0; i < dxbc.section_count; ++i) -- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); -- return ret; --} -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -new file mode 100644 -index 00000000000..d2bfb933edc ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -0,0 +1,1294 @@ -+/* -+ * Copyright 2023 Conor McCarthy for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "vkd3d_shader_private.h" -+ -+static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) -+{ -+ return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; -+} -+ -+static bool shader_instruction_is_dcl(const struct vkd3d_shader_instruction *ins) -+{ -+ return (VKD3DSIH_DCL <= ins->handler_idx && ins->handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) -+ || ins->handler_idx == VKD3DSIH_HS_DECLS; -+} -+ -+static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) -+{ -+ ins->handler_idx = VKD3DSIH_NOP; -+ ins->dst_count = 0; -+ ins->src_count = 0; -+ ins->dst = NULL; -+ ins->src = NULL; -+} -+ -+static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg, -+ unsigned int instance_id) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < reg->idx_count; ++i) -+ { -+ if (reg->idx[i].rel_addr && shader_register_is_phase_instance_id(®->idx[i].rel_addr->reg)) -+ { -+ reg->idx[i].rel_addr = NULL; -+ reg->idx[i].offset += instance_id; -+ } -+ } -+} -+ -+static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_instruction *ins, -+ unsigned int instance_id) -+{ -+ struct vkd3d_shader_register *reg; -+ unsigned int i; -+ -+ for (i = 0; i < ins->src_count; ++i) -+ { -+ reg = (struct vkd3d_shader_register *)&ins->src[i].reg; -+ if (shader_register_is_phase_instance_id(reg)) -+ { -+ reg->type = VKD3DSPR_IMMCONST; -+ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -+ reg->non_uniform = false; -+ reg->idx[0].offset = ~0u; -+ reg->idx[0].rel_addr = NULL; -+ reg->idx[1].offset = ~0u; -+ reg->idx[1].rel_addr = NULL; -+ reg->idx[2].offset = ~0u; -+ reg->idx[2].rel_addr = NULL; -+ reg->idx_count = 0; -+ reg->immconst_type = VKD3D_IMMCONST_SCALAR; -+ reg->u.immconst_uint[0] = instance_id; -+ continue; -+ } -+ shader_register_eliminate_phase_addressing(reg, instance_id); -+ } -+ -+ for (i = 0; i < ins->dst_count; ++i) -+ shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); -+} -+ -+static const struct vkd3d_shader_varying_map *find_varying_map( -+ const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < varying_map->varying_count; ++i) -+ { -+ if (varying_map->varying_map[i].output_signature_index == signature_idx) -+ return &varying_map->varying_map[i]; -+ } -+ -+ return NULL; -+} -+ -+static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info) -+{ -+ struct shader_signature *signature = &parser->shader_desc.output_signature; -+ const struct vkd3d_shader_varying_map_info *varying_map; -+ unsigned int i; -+ -+ if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO))) -+ return VKD3D_OK; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i); -+ struct signature_element *e = &signature->elements[i]; -+ -+ if (map) -+ { -+ unsigned int input_mask = map->input_mask; -+ -+ e->target_location = map->input_register_index; -+ -+ /* It is illegal in Vulkan if the next shader uses the same varying -+ * location with a different mask. */ -+ if (input_mask && input_mask != e->mask) -+ { -+ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: " -+ "Output mask %#x does not match input mask %#x.", -+ e->mask, input_mask); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ } -+ else -+ { -+ e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; -+ } -+ } -+ -+ for (i = 0; i < varying_map->varying_count; ++i) -+ { -+ if (varying_map->varying_map[i].output_signature_index >= signature->element_count) -+ { -+ vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Aborting due to not yet implemented feature: " -+ "The next stage consumes varyings not written by this stage."); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+struct hull_flattener -+{ -+ struct vkd3d_shader_instruction_array instructions; -+ -+ unsigned int max_temp_count; -+ unsigned int temp_dcl_idx; -+ -+ unsigned int instance_count; -+ unsigned int phase_body_idx; -+ enum vkd3d_shader_opcode phase; -+}; -+ -+static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener) -+{ -+ return flattener->phase == VKD3DSIH_HS_FORK_PHASE || flattener->phase == VKD3DSIH_HS_JOIN_PHASE; -+} -+ -+struct shader_phase_location -+{ -+ unsigned int index; -+ unsigned int instance_count; -+ unsigned int instruction_count; -+}; -+ -+struct shader_phase_location_array -+{ -+ /* Unlikely worst case: one phase for each component of each output register. */ -+ struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE]; -+ unsigned int count; -+}; -+ -+static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser, -+ unsigned int index, struct shader_phase_location_array *locations) -+{ -+ struct vkd3d_shader_instruction *ins = &normaliser->instructions.elements[index]; -+ struct shader_phase_location *loc; -+ bool b; -+ -+ if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -+ { -+ b = flattener_is_in_fork_or_join_phase(normaliser); -+ /* Reset the phase info. */ -+ normaliser->phase_body_idx = ~0u; -+ normaliser->phase = ins->handler_idx; -+ normaliser->instance_count = 1; -+ /* Leave the first occurrence and delete the rest. */ -+ if (b) -+ vkd3d_shader_instruction_make_nop(ins); -+ return; -+ } -+ else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT -+ || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) -+ { -+ normaliser->instance_count = ins->declaration.count + !ins->declaration.count; -+ vkd3d_shader_instruction_make_nop(ins); -+ return; -+ } -+ else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( -+ &ins->declaration.dst.reg)) -+ { -+ vkd3d_shader_instruction_make_nop(ins); -+ return; -+ } -+ else if (ins->handler_idx == VKD3DSIH_DCL_TEMPS && normaliser->phase != VKD3DSIH_INVALID) -+ { -+ /* Leave only the first temp declaration and set it to the max count later. */ -+ if (!normaliser->max_temp_count) -+ normaliser->temp_dcl_idx = index; -+ else -+ vkd3d_shader_instruction_make_nop(ins); -+ normaliser->max_temp_count = max(normaliser->max_temp_count, ins->declaration.count); -+ return; -+ } -+ -+ if (normaliser->phase == VKD3DSIH_INVALID || shader_instruction_is_dcl(ins)) -+ return; -+ -+ if (normaliser->phase_body_idx == ~0u) -+ normaliser->phase_body_idx = index; -+ -+ if (ins->handler_idx == VKD3DSIH_RET) -+ { -+ vkd3d_shader_instruction_make_nop(ins); -+ if (locations->count >= ARRAY_SIZE(locations->locations)) -+ { -+ FIXME("Insufficient space for phase location.\n"); -+ return; -+ } -+ loc = &locations->locations[locations->count++]; -+ loc->index = normaliser->phase_body_idx; -+ loc->instance_count = normaliser->instance_count; -+ loc->instruction_count = index - normaliser->phase_body_idx; -+ } -+} -+ -+static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser, -+ struct shader_phase_location_array *locations) -+{ -+ struct shader_phase_location *loc; -+ unsigned int i, j, k, end, count; -+ -+ for (i = 0, count = 0; i < locations->count; ++i) -+ count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count; -+ -+ if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ end = normaliser->instructions.count; -+ normaliser->instructions.count += count; -+ -+ for (i = locations->count; i > 0; --i) -+ { -+ loc = &locations->locations[i - 1]; -+ j = loc->index + loc->instruction_count; -+ memmove(&normaliser->instructions.elements[j + count], &normaliser->instructions.elements[j], -+ (end - j) * sizeof(*normaliser->instructions.elements)); -+ end = j; -+ count -= (loc->instance_count - 1) * loc->instruction_count; -+ loc->index += count; -+ } -+ -+ for (i = 0, count = 0; i < locations->count; ++i) -+ { -+ loc = &locations->locations[i]; -+ /* Make a copy of the non-dcl instructions for each instance. */ -+ for (j = 1; j < loc->instance_count; ++j) -+ { -+ for (k = 0; k < loc->instruction_count; ++k) -+ { -+ if (!shader_instruction_array_clone_instruction(&normaliser->instructions, -+ loc->index + loc->instruction_count * j + k, loc->index + k)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ /* Replace each reference to the instance id with a constant instance id. */ -+ for (j = 0; j < loc->instance_count; ++j) -+ { -+ for (k = 0; k < loc->instruction_count; ++k) -+ shader_instruction_eliminate_phase_instance_id( -+ &normaliser->instructions.elements[loc->index + loc->instruction_count * j + k], j); -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, -+ enum vkd3d_data_type data_type, unsigned int idx_count) -+{ -+ reg->type = reg_type; -+ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -+ reg->non_uniform = false; -+ reg->data_type = data_type; -+ reg->idx[0].offset = ~0u; -+ reg->idx[0].rel_addr = NULL; -+ reg->idx[1].offset = ~0u; -+ reg->idx[1].rel_addr = NULL; -+ reg->idx[2].offset = ~0u; -+ reg->idx[2].rel_addr = NULL; -+ reg->idx_count = idx_count; -+ reg->immconst_type = VKD3D_IMMCONST_SCALAR; -+} -+ -+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) -+{ -+ memset(ins, 0, sizeof(*ins)); -+ ins->handler_idx = handler_idx; -+} -+ -+static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) -+{ -+ struct hull_flattener flattener = {*src_instructions}; -+ struct vkd3d_shader_instruction_array *instructions; -+ struct shader_phase_location_array locations; -+ enum vkd3d_result result = VKD3D_OK; -+ unsigned int i; -+ -+ instructions = &flattener.instructions; -+ -+ flattener.phase = VKD3DSIH_INVALID; -+ for (i = 0, locations.count = 0; i < instructions->count; ++i) -+ flattener_eliminate_phase_related_dcls(&flattener, i, &locations); -+ -+ if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) -+ return result; -+ -+ if (flattener.phase != VKD3DSIH_INVALID) -+ { -+ if (flattener.temp_dcl_idx) -+ instructions->elements[flattener.temp_dcl_idx].declaration.count = flattener.max_temp_count; -+ -+ if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ shader_instruction_init(&instructions->elements[instructions->count++], VKD3DSIH_RET); -+ } -+ -+ *src_instructions = flattener.instructions; -+ return result; -+} -+ -+struct control_point_normaliser -+{ -+ struct vkd3d_shader_instruction_array instructions; -+ enum vkd3d_shader_opcode phase; -+ struct vkd3d_shader_src_param *outpointid_param; -+}; -+ -+static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) -+{ -+ return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+} -+ -+static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( -+ struct vkd3d_shader_instruction_array *instructions) -+{ -+ struct vkd3d_shader_src_param *rel_addr; -+ -+ if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) -+ return NULL; -+ -+ shader_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); -+ rel_addr->swizzle = 0; -+ rel_addr->modifiers = 0; -+ -+ return rel_addr; -+} -+ -+static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, -+ struct control_point_normaliser *normaliser) -+{ -+ struct vkd3d_shader_register *reg = &dst_param->reg; -+ -+ if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) -+ { -+ /* The TPF reader validates idx_count. */ -+ assert(reg->idx_count == 1); -+ reg->idx[1] = reg->idx[0]; -+ /* The control point id param is implicit here. Avoid later complications by inserting it. */ -+ reg->idx[0].offset = 0; -+ reg->idx[0].rel_addr = normaliser->outpointid_param; -+ ++reg->idx_count; -+ } -+} -+ -+static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, -+ enum vkd3d_shader_register_type reg_type, unsigned int idx_count) -+{ -+ param->write_mask = e->mask; -+ param->modifiers = 0; -+ param->shift = 0; -+ shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); -+} -+ -+static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, -+ const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst) -+{ -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_dst_param *param; -+ const struct signature_element *e; -+ unsigned int i, count; -+ -+ for (i = 0, count = 1; i < s->element_count; ++i) -+ count += !!s->elements[i].used_mask; -+ -+ if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst], -+ (normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements)); -+ normaliser->instructions.count += count; -+ -+ ins = &normaliser->instructions.elements[dst]; -+ shader_instruction_init(ins, VKD3DSIH_HS_CONTROL_POINT_PHASE); -+ ins->flags = 1; -+ ++ins; -+ -+ for (i = 0; i < s->element_count; ++i) -+ { -+ e = &s->elements[i]; -+ if (!e->used_mask) -+ continue; -+ -+ if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) -+ { -+ shader_instruction_init(ins, VKD3DSIH_DCL_INPUT_SIV); -+ param = &ins->declaration.register_semantic.reg; -+ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); -+ } -+ else -+ { -+ shader_instruction_init(ins, VKD3DSIH_DCL_INPUT); -+ param = &ins->declaration.dst; -+ } -+ -+ shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); -+ param->reg.idx[0].offset = input_control_point_count; -+ param->reg.idx[1].offset = i; -+ -+ ++ins; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( -+ struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) -+{ -+ struct vkd3d_shader_instruction_array *instructions; -+ struct control_point_normaliser normaliser; -+ unsigned int input_control_point_count; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_result ret; -+ unsigned int i, j; -+ -+ if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) -+ { -+ ERR("Failed to allocate src param.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ normaliser.instructions = *src_instructions; -+ instructions = &normaliser.instructions; -+ normaliser.phase = VKD3DSIH_INVALID; -+ -+ for (i = 0; i < normaliser.instructions.count; ++i) -+ { -+ ins = &instructions->elements[i]; -+ -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ normaliser.phase = ins->handler_idx; -+ break; -+ default: -+ if (shader_instruction_is_dcl(ins)) -+ break; -+ for (j = 0; j < ins->dst_count; ++j) -+ shader_dst_param_normalise_outpointid((struct vkd3d_shader_dst_param *)&ins->dst[j], &normaliser); -+ break; -+ } -+ } -+ -+ normaliser.phase = VKD3DSIH_INVALID; -+ input_control_point_count = 1; -+ -+ for (i = 0; i < instructions->count; ++i) -+ { -+ ins = &instructions->elements[i]; -+ -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: -+ input_control_point_count = ins->declaration.count; -+ break; -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ *src_instructions = normaliser.instructions; -+ return VKD3D_OK; -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, -+ input_control_point_count, i); -+ *src_instructions = normaliser.instructions; -+ return ret; -+ default: -+ break; -+ } -+ } -+ -+ *src_instructions = normaliser.instructions; -+ return VKD3D_OK; -+} -+ -+struct io_normaliser -+{ -+ struct vkd3d_shader_instruction_array instructions; -+ enum vkd3d_shader_type shader_type; -+ struct shader_signature *input_signature; -+ struct shader_signature *output_signature; -+ struct shader_signature *patch_constant_signature; -+ -+ unsigned int max_temp_count; -+ unsigned int temp_dcl_idx; -+ -+ unsigned int instance_count; -+ unsigned int phase_body_idx; -+ enum vkd3d_shader_opcode phase; -+ unsigned int output_control_point_count; -+ -+ struct vkd3d_shader_src_param *outpointid_param; -+ -+ struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; -+ struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; -+ struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT]; -+ uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; -+ uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; -+ uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; -+}; -+ -+static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *normaliser) -+{ -+ return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; -+} -+ -+static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) -+{ -+ return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+} -+ -+static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, -+ unsigned int reg_idx, unsigned int write_mask) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ struct signature_element *e = &signature->elements[i]; -+ if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx -+ && (e->mask & write_mask) == write_mask) -+ { -+ return i; -+ } -+ } -+ -+ /* Validated in the TPF reader. */ -+ vkd3d_unreachable(); -+} -+ -+static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], -+ unsigned int register_idx, unsigned int write_mask) -+{ -+ return range_map[register_idx][vkd3d_write_mask_get_component_idx(write_mask)]; -+} -+ -+static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx, -+ unsigned int register_count, unsigned int write_mask, bool is_dcl_indexrange) -+{ -+ unsigned int i, j, r, c, component_idx, component_count; -+ -+ assert(write_mask <= VKD3DSP_WRITEMASK_ALL); -+ component_idx = vkd3d_write_mask_get_component_idx(write_mask); -+ component_count = vkd3d_write_mask_component_count(write_mask); -+ -+ assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); -+ -+ if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) -+ { -+ /* Validated in the TPF reader. */ -+ assert(range_map[register_idx][component_idx] != UINT8_MAX); -+ return; -+ } -+ if (range_map[register_idx][component_idx] == register_count) -+ { -+ /* Already done. This happens when fxc splits a register declaration by -+ * component(s). The dcl_indexrange instructions are split too. */ -+ return; -+ } -+ range_map[register_idx][component_idx] = register_count; -+ -+ for (i = 0; i < register_count; ++i) -+ { -+ r = register_idx + i; -+ for (j = !i; j < component_count; ++j) -+ { -+ c = component_idx + j; -+ /* A synthetic patch constant range which overlaps an existing range can start upstream of it -+ * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. -+ * The latter is validated in the TPF reader. */ -+ assert(!range_map[r][c] || !is_dcl_indexrange); -+ range_map[r][c] = UINT8_MAX; -+ } -+ } -+} -+ -+static void io_normaliser_add_index_range(struct io_normaliser *normaliser, -+ const struct vkd3d_shader_instruction *ins) -+{ -+ const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; -+ const struct vkd3d_shader_register *reg = &range->dst.reg; -+ unsigned int reg_idx, write_mask, element_idx; -+ const struct shader_signature *signature; -+ uint8_t (*range_map)[VKD3D_VEC4_SIZE]; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_INCONTROLPOINT: -+ range_map = normaliser->input_range_map; -+ signature = normaliser->input_signature; -+ break; -+ case VKD3DSPR_OUTCONTROLPOINT: -+ range_map = normaliser->output_range_map; -+ signature = normaliser->output_signature; -+ break; -+ case VKD3DSPR_OUTPUT: -+ if (!io_normaliser_is_in_fork_or_join_phase(normaliser)) -+ { -+ range_map = normaliser->output_range_map; -+ signature = normaliser->output_signature; -+ break; -+ } -+ /* fall through */ -+ case VKD3DSPR_PATCHCONST: -+ range_map = normaliser->pc_range_map; -+ signature = normaliser->patch_constant_signature; -+ break; -+ default: -+ /* Validated in the TPF reader. */ -+ vkd3d_unreachable(); -+ } -+ -+ reg_idx = reg->idx[reg->idx_count - 1].offset; -+ write_mask = range->dst.write_mask; -+ element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); -+ range_map_set_register_range(range_map, reg_idx, range->register_count, -+ signature->elements[element_idx].mask, true); -+} -+ -+static int signature_element_mask_compare(const void *a, const void *b) -+{ -+ const struct signature_element *e = a, *f = b; -+ int ret; -+ -+ return (ret = vkd3d_u32_compare(e->mask, f->mask)) ? ret : vkd3d_u32_compare(e->register_index, f->register_index); -+} -+ -+static bool sysval_semantics_should_merge(const struct signature_element *e, const struct signature_element *f) -+{ -+ if (e->sysval_semantic < VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE -+ || e->sysval_semantic > VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) -+ return false; -+ -+ return e->sysval_semantic == f->sysval_semantic -+ /* Line detail and density must be merged together to match the SPIR-V array. -+ * This deletes one of the two sysvals, but these are not used. */ -+ || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET -+ && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) -+ || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN -+ && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET); -+} -+ -+/* Merge tess factor sysvals because they are an array in SPIR-V. */ -+static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s, -+ uint8_t range_map[][VKD3D_VEC4_SIZE]) -+{ -+ struct signature_element *e, *f; -+ unsigned int i, j, register_count; -+ -+ qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare); -+ -+ for (i = 0; i < s->element_count; i += register_count) -+ { -+ e = &s->elements[i]; -+ register_count = 1; -+ -+ if (!e->sysval_semantic) -+ continue; -+ -+ for (j = i + 1; j < s->element_count; ++j, ++register_count) -+ { -+ f = &s->elements[j]; -+ if (f->register_index != e->register_index + register_count || !sysval_semantics_should_merge(e, f)) -+ break; -+ } -+ if (register_count < 2) -+ continue; -+ -+ range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false); -+ } -+} -+ -+static int signature_element_register_compare(const void *a, const void *b) -+{ -+ const struct signature_element *e = a, *f = b; -+ -+ return vkd3d_u32_compare(e->register_index, f->register_index); -+} -+ -+static int signature_element_index_compare(const void *a, const void *b) -+{ -+ const struct signature_element *e = a, *f = b; -+ -+ return vkd3d_u32_compare(e->sort_index, f->sort_index); -+} -+ -+static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], -+ bool is_patch_constant) -+{ -+ unsigned int i, j, element_count, new_count, register_count; -+ struct signature_element *elements; -+ struct signature_element *e, *f; -+ -+ element_count = s->element_count; -+ if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) -+ return false; -+ memcpy(elements, s->elements, element_count * sizeof(*elements)); -+ -+ qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); -+ -+ for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) -+ { -+ e = &elements[i]; -+ j = i + 1; -+ -+ if (e->register_index == ~0u) -+ continue; -+ -+ /* Do not merge if the register index will be relative-addressed. */ -+ if (range_map_get_register_count(range_map, e->register_index, e->mask) > 1) -+ continue; -+ -+ for (; j < element_count; ++j) -+ { -+ f = &elements[j]; -+ -+ /* Merge different components of the same register unless sysvals are different, -+ * or it will be relative-addressed. */ -+ if (f->register_index != e->register_index || f->sysval_semantic != e->sysval_semantic -+ || range_map_get_register_count(range_map, f->register_index, f->mask) > 1) -+ break; -+ -+ TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, -+ e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); -+ assert(!(e->mask & f->mask)); -+ -+ e->mask |= f->mask; -+ e->used_mask |= f->used_mask; -+ e->semantic_index = min(e->semantic_index, f->semantic_index); -+ } -+ } -+ element_count = new_count; -+ vkd3d_free(s->elements); -+ s->elements = elements; -+ s->element_count = element_count; -+ -+ if (is_patch_constant) -+ shader_signature_map_patch_constant_index_ranges(s, range_map); -+ -+ for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e) -+ { -+ e = &elements[i]; -+ register_count = 1; -+ -+ if (e->register_index >= MAX_REG_OUTPUT) -+ continue; -+ -+ register_count = range_map_get_register_count(range_map, e->register_index, e->mask); -+ assert(register_count != UINT8_MAX); -+ register_count += !register_count; -+ -+ if (register_count > 1) -+ { -+ TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); -+ e->register_count = register_count; -+ } -+ } -+ element_count = new_count; -+ -+ /* Restoring the original order is required for sensible trace output. */ -+ qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare); -+ -+ s->element_count = element_count; -+ -+ return true; -+} -+ -+static bool sysval_semantic_is_tess_factor(enum vkd3d_shader_sysval_semantic sysval_semantic) -+{ -+ return sysval_semantic >= VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE -+ && sysval_semantic <= VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; -+} -+ -+static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, -+ unsigned int id_idx, unsigned int register_index) -+{ -+ assert(id_idx < ARRAY_SIZE(reg->idx) - 1); -+ -+ /* For a relative-addressed register index, move the id up a slot to separate it from the address, -+ * because rel_addr can be replaced with a constant offset in some cases. */ -+ if (reg->idx[id_idx].rel_addr) -+ { -+ reg->idx[id_idx + 1].rel_addr = NULL; -+ reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; -+ reg->idx[id_idx].offset -= register_index; -+ ++id_idx; -+ } -+ /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where -+ * tessellation level registers are merged into an array because they're an array in SPIR-V. */ -+ else -+ { -+ ++id_idx; -+ memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); -+ reg->idx[0].rel_addr = NULL; -+ reg->idx[0].offset = reg->idx[id_idx].offset - register_index; -+ } -+ -+ return id_idx; -+} -+ -+static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, -+ struct io_normaliser *normaliser) -+ { -+ unsigned int id_idx, reg_idx, write_mask, element_idx; -+ struct vkd3d_shader_register *reg = &dst_param->reg; -+ struct vkd3d_shader_dst_param **dcl_params; -+ const struct shader_signature *signature; -+ const struct signature_element *e; -+ -+ if ((reg->type == VKD3DSPR_OUTPUT && io_normaliser_is_in_fork_or_join_phase(normaliser)) -+ || reg->type == VKD3DSPR_PATCHCONST) -+ { -+ signature = normaliser->patch_constant_signature; -+ /* Convert patch constant outputs to the patch constant register type to avoid the need -+ * to convert compiler symbols when accessed as inputs in a later stage. */ -+ reg->type = VKD3DSPR_PATCHCONST; -+ dcl_params = normaliser->pc_dcl_params; -+ } -+ else if (reg->type == VKD3DSPR_OUTPUT || dst_param->reg.type == VKD3DSPR_COLOROUT) -+ { -+ signature = normaliser->output_signature; -+ dcl_params = normaliser->output_dcl_params; -+ } -+ else if (dst_param->reg.type == VKD3DSPR_INCONTROLPOINT || dst_param->reg.type == VKD3DSPR_INPUT) -+ { -+ signature = normaliser->input_signature; -+ dcl_params = normaliser->input_dcl_params; -+ } -+ else -+ { -+ return true; -+ } -+ -+ id_idx = reg->idx_count - 1; -+ reg_idx = reg->idx[id_idx].offset; -+ write_mask = dst_param->write_mask; -+ element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); -+ e = &signature->elements[element_idx]; -+ -+ dst_param->write_mask >>= vkd3d_write_mask_get_component_idx(e->mask); -+ if (is_io_dcl) -+ { -+ /* Validated in the TPF reader. */ -+ assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); -+ -+ if (dcl_params[element_idx]) -+ { -+ /* Merge split declarations into a single one. */ -+ dcl_params[element_idx]->write_mask |= dst_param->write_mask; -+ /* Turn this into a nop. */ -+ return false; -+ } -+ else -+ { -+ dcl_params[element_idx] = dst_param; -+ } -+ } -+ -+ if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) -+ { -+ if (is_io_dcl) -+ { -+ /* Emit an array size for the control points for consistency with inputs. */ -+ reg->idx[0].offset = normaliser->output_control_point_count; -+ } -+ else -+ { -+ /* The control point id param. */ -+ assert(reg->idx[0].rel_addr); -+ } -+ id_idx = 1; -+ } -+ -+ if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) -+ { -+ if (is_io_dcl) -+ { -+ /* For control point I/O, idx 0 contains the control point count. -+ * Ensure it is moved up to the next slot. */ -+ reg->idx[id_idx].offset = reg->idx[0].offset; -+ reg->idx[0].offset = e->register_count; -+ ++id_idx; -+ } -+ else -+ { -+ id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); -+ } -+ } -+ -+ /* Replace the register index with the signature element index */ -+ reg->idx[id_idx].offset = element_idx; -+ reg->idx_count = id_idx + 1; -+ -+ return true; -+} -+ -+static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_param, -+ struct io_normaliser *normaliser) -+{ -+ unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx; -+ struct vkd3d_shader_register *reg = &src_param->reg; -+ const struct shader_signature *signature; -+ const struct signature_element *e; -+ -+ /* Input/output registers from one phase can be used as inputs in -+ * subsequent phases. Specifically: -+ * -+ * - Control phase inputs are available as "vicp" in fork and join -+ * phases. -+ * - Control phase outputs are available as "vocp" in fork and join -+ * phases. -+ * - Fork phase patch constants are available as "vpc" in join -+ * phases. -+ * -+ * We handle "vicp" here by converting INCONTROLPOINT src registers to -+ * type INPUT so they match the control phase declarations. We handle -+ * "vocp" by converting OUTCONTROLPOINT registers to type OUTPUT. -+ * Merging fork and join phases handles "vpc". */ -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_PATCHCONST: -+ signature = normaliser->patch_constant_signature; -+ break; -+ case VKD3DSPR_INCONTROLPOINT: -+ if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) -+ reg->type = VKD3DSPR_INPUT; -+ /* fall through */ -+ case VKD3DSPR_INPUT: -+ signature = normaliser->input_signature; -+ break; -+ case VKD3DSPR_OUTCONTROLPOINT: -+ if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) -+ reg->type = VKD3DSPR_OUTPUT; -+ /* fall through */ -+ case VKD3DSPR_OUTPUT: -+ signature = normaliser->output_signature; -+ break; -+ default: -+ return; -+ } -+ -+ id_idx = reg->idx_count - 1; -+ reg_idx = reg->idx[id_idx].offset; -+ write_mask = VKD3DSP_WRITEMASK_0 << vkd3d_swizzle_get_component(src_param->swizzle, 0); -+ element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); -+ -+ e = &signature->elements[element_idx]; -+ if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) -+ id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); -+ reg->idx[id_idx].offset = element_idx; -+ reg->idx_count = id_idx + 1; -+ -+ if ((component_idx = vkd3d_write_mask_get_component_idx(e->mask))) -+ { -+ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) -+ if (vkd3d_swizzle_get_component(src_param->swizzle, i)) -+ src_param->swizzle -= component_idx << VKD3D_SHADER_SWIZZLE_SHIFT(i); -+ } -+} -+ -+static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, -+ struct io_normaliser *normaliser) -+{ -+ struct vkd3d_shader_register *reg; -+ bool keep = true; -+ unsigned int i; -+ -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_DCL_INPUT: -+ if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) -+ { -+ reg = &ins->declaration.dst.reg; -+ /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their -+ * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ -+ if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) -+ vkd3d_shader_instruction_make_nop(ins); -+ else if (reg->type == VKD3DSPR_INCONTROLPOINT) -+ reg->type = VKD3DSPR_INPUT; -+ } -+ /* fall through */ -+ case VKD3DSIH_DCL_INPUT_PS: -+ case VKD3DSIH_DCL_OUTPUT: -+ keep = shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser); -+ break; -+ case VKD3DSIH_DCL_INPUT_SGV: -+ case VKD3DSIH_DCL_INPUT_SIV: -+ case VKD3DSIH_DCL_INPUT_PS_SGV: -+ case VKD3DSIH_DCL_INPUT_PS_SIV: -+ case VKD3DSIH_DCL_OUTPUT_SIV: -+ keep = shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, -+ normaliser); -+ break; -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ normaliser->phase = ins->handler_idx; -+ memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); -+ memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); -+ memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); -+ break; -+ default: -+ if (shader_instruction_is_dcl(ins)) -+ break; -+ for (i = 0; i < ins->dst_count; ++i) -+ shader_dst_param_io_normalise((struct vkd3d_shader_dst_param *)&ins->dst[i], false, normaliser); -+ for (i = 0; i < ins->src_count; ++i) -+ shader_src_param_io_normalise((struct vkd3d_shader_src_param *)&ins->src[i], normaliser); -+ break; -+ } -+ -+ if (!keep) -+ shader_instruction_init(ins, VKD3DSIH_NOP); -+} -+ -+static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, -+ enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, -+ struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) -+{ -+ struct io_normaliser normaliser = {*instructions}; -+ struct vkd3d_shader_instruction *ins; -+ bool has_control_point_phase; -+ unsigned int i, j; -+ -+ normaliser.phase = VKD3DSIH_INVALID; -+ normaliser.shader_type = shader_type; -+ normaliser.input_signature = input_signature; -+ normaliser.output_signature = output_signature; -+ normaliser.patch_constant_signature = patch_constant_signature; -+ -+ for (i = 0, has_control_point_phase = false; i < instructions->count; ++i) -+ { -+ ins = &instructions->elements[i]; -+ -+ switch (ins->handler_idx) -+ { -+ case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: -+ normaliser.output_control_point_count = ins->declaration.count; -+ break; -+ case VKD3DSIH_DCL_INDEX_RANGE: -+ io_normaliser_add_index_range(&normaliser, ins); -+ vkd3d_shader_instruction_make_nop(ins); -+ break; -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ has_control_point_phase = true; -+ /* fall through */ -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ normaliser.phase = ins->handler_idx; -+ break; -+ default: -+ break; -+ } -+ } -+ -+ if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) -+ { -+ /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ -+ for (i = 0; i < MAX_REG_OUTPUT; ++i) -+ { -+ for (j = 0; j < VKD3D_VEC4_SIZE; ++j) -+ { -+ if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) -+ normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; -+ else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) -+ normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; -+ else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); -+ } -+ } -+ } -+ -+ if (!shader_signature_merge(input_signature, normaliser.input_range_map, false) -+ || !shader_signature_merge(output_signature, normaliser.output_range_map, false) -+ || !shader_signature_merge(patch_constant_signature, normaliser.pc_range_map, true)) -+ { -+ *instructions = normaliser.instructions; -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ normaliser.phase = VKD3DSIH_INVALID; -+ for (i = 0; i < normaliser.instructions.count; ++i) -+ shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); -+ -+ *instructions = normaliser.instructions; -+ return VKD3D_OK; -+} -+ -+struct flat_constant_def -+{ -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t index; -+ uint32_t value[4]; -+}; -+ -+struct flat_constants_normaliser -+{ -+ struct vkd3d_shader_parser *parser; -+ struct flat_constant_def *defs; -+ size_t def_count, defs_capacity; -+}; -+ -+static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, -+ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) -+{ -+ static const struct -+ { -+ enum vkd3d_shader_register_type type; -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t offset; -+ } -+ regs[] = -+ { -+ {VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0}, -+ {VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048}, -+ {VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096}, -+ {VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144}, -+ {VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0}, -+ {VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0}, -+ }; -+ -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(regs); ++i) -+ { -+ if (reg->type == regs[i].type) -+ { -+ if (reg->idx[0].rel_addr) -+ { -+ FIXME("Unhandled relative address.\n"); -+ return false; -+ } -+ -+ *set = regs[i].set; -+ *index = regs[i].offset + reg->idx[0].offset; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param, -+ const struct flat_constants_normaliser *normaliser) -+{ -+ enum vkd3d_shader_d3dbc_constant_register set; -+ uint32_t index; -+ size_t i, j; -+ -+ if (!get_flat_constant_register_type(¶m->reg, &set, &index)) -+ return; -+ -+ for (i = 0; i < normaliser->def_count; ++i) -+ { -+ if (normaliser->defs[i].set == set && normaliser->defs[i].index == index) -+ { -+ param->reg.type = VKD3DSPR_IMMCONST; -+ param->reg.idx_count = 0; -+ param->reg.immconst_type = VKD3D_IMMCONST_VEC4; -+ for (j = 0; j < 4; ++j) -+ param->reg.u.immconst_uint[j] = normaliser->defs[i].value[j]; -+ return; -+ } -+ } -+ -+ param->reg.type = VKD3DSPR_CONSTBUFFER; -+ param->reg.idx[0].offset = set; /* register ID */ -+ param->reg.idx[1].offset = set; /* register index */ -+ param->reg.idx[2].offset = index; /* buffer index */ -+ param->reg.idx_count = 3; -+} -+ -+static enum vkd3d_result instruction_array_normalise_flat_constants(struct vkd3d_shader_parser *parser) -+{ -+ struct flat_constants_normaliser normaliser = {.parser = parser}; -+ unsigned int i, j; -+ -+ for (i = 0; i < parser->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; -+ -+ if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) -+ { -+ struct flat_constant_def *def; -+ -+ if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity, -+ normaliser.def_count + 1, sizeof(*normaliser.defs))) -+ { -+ vkd3d_free(normaliser.defs); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ def = &normaliser.defs[normaliser.def_count++]; -+ -+ get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); -+ for (j = 0; j < 4; ++j) -+ def->value[j] = ins->src[0].reg.u.immconst_uint[j]; -+ -+ vkd3d_shader_instruction_make_nop(ins); -+ } -+ else -+ { -+ for (j = 0; j < ins->src_count; ++j) -+ shader_register_normalise_flat_constants((struct vkd3d_shader_src_param *)&ins->src[j], &normaliser); -+ } -+ } -+ -+ vkd3d_free(normaliser.defs); -+ return VKD3D_OK; -+} -+ -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &parser->instructions; -+ enum vkd3d_result result = VKD3D_OK; -+ -+ if (parser->shader_desc.is_dxil) -+ return result; -+ -+ if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL -+ && (result = remap_output_signature(parser, compile_info)) < 0) -+ return result; -+ -+ if (parser->shader_version.type == VKD3D_SHADER_TYPE_HULL -+ && (result = instruction_array_flatten_hull_shader_phases(instructions)) >= 0) -+ { -+ result = instruction_array_normalise_hull_shader_control_point_io(instructions, -+ &parser->shader_desc.input_signature); -+ } -+ if (result >= 0) -+ result = instruction_array_normalise_io_registers(instructions, parser->shader_version.type, -+ &parser->shader_desc.input_signature, &parser->shader_desc.output_signature, -+ &parser->shader_desc.patch_constant_signature); -+ -+ if (result >= 0) -+ result = instruction_array_normalise_flat_constants(parser); -+ -+ if (result >= 0 && TRACE_ON()) -+ vkd3d_shader_trace(instructions, &parser->shader_version); -+ -+ return result; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h -index 4860cf5f90e..e1cb75e177c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.h -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h -@@ -22,7 +22,7 @@ - #define __VKD3D_SHADER_PREPROC_H - - #include "vkd3d_shader_private.h" --#include "wine/rbtree.h" -+#include "rbtree.h" - - struct preproc_if_state - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index bb5a6b61de1..6fb61eff6c3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -30,6 +30,13 @@ - - #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) - -+static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) -+{ -+ if (!ctx->expansion_count) -+ return NULL; -+ return ctx->expansion_stack[ctx->expansion_count - 1].macro; -+} -+ - static void update_location(struct preproc_ctx *ctx); - - #define YY_USER_ACTION update_location(yyget_extra(yyscanner)); -@@ -41,6 +48,7 @@ static void update_location(struct preproc_ctx *ctx); - %option bison-locations - %option extra-type="struct preproc_ctx *" - %option never-interactive -+%option nodefault - %option noinput - %option nounput - %option noyy_top_state -@@ -75,6 +83,7 @@ INT_SUFFIX [uUlL]{0,2} - "*/" {yy_pop_state(yyscanner);} - <> {yy_pop_state(yyscanner);} - . {} -+\n {} - - (\\{NEWLINE}|[^\n])* {return T_STRING;} - -@@ -123,7 +132,20 @@ INT_SUFFIX [uUlL]{0,2} - const char *p; - - if (!ctx->last_was_newline) -- return T_HASHSTRING; -+ { -+ struct preproc_macro *macro; -+ -+ /* Stringification is only done for function-like macro bodies. -+ * Anywhere else, we need to parse it as two separate tokens. -+ * We could use a state for this, but yyless() is easier and cheap. -+ */ -+ -+ if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ return T_HASHSTRING; -+ -+ yyless(1); -+ return T_TEXT; -+ } - - for (p = yytext + 1; strchr(" \t", *p); ++p) - ; -@@ -176,9 +198,9 @@ INT_SUFFIX [uUlL]{0,2} - return T_NEWLINE; - } - --{WS}+ {} -+{WS}+ {} - [-()\[\]{},+!*/<>&|^?:] {return yytext[0];} --. {return T_TEXT;} -+. {return T_TEXT;} - - %% - -@@ -217,13 +239,6 @@ static bool preproc_is_writing(struct preproc_ctx *ctx) - return file->if_stack[file->if_count - 1].current_true; - } - --static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) --{ -- if (!ctx->expansion_count) -- return NULL; -- return ctx->expansion_stack[ctx->expansion_count - 1].macro; --} -- - /* Concatenation is not done for object-like macros, but is done for both - * function-like macro bodies and their arguments. */ - static bool should_concat(struct preproc_ctx *ctx) -@@ -332,6 +347,43 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, - return true; - } - -+static void preproc_stringify(struct preproc_ctx *ctx, struct vkd3d_string_buffer *buffer, const char *text) -+{ -+ const struct preproc_text *expansion; -+ const char *p = text + 1; -+ unsigned int i; -+ -+ while (*p == ' ' || *p == '\t') -+ ++p; -+ -+ vkd3d_string_buffer_printf(buffer, "\""); -+ if ((expansion = find_arg_expansion(ctx, p))) -+ { -+ size_t len = expansion->text.content_size; -+ size_t start = 0; -+ -+ while (len && strchr(" \t\r\n", expansion->text.buffer[len - 1])) -+ --len; -+ -+ while (start < len && strchr(" \t\r\n", expansion->text.buffer[start])) -+ ++start; -+ -+ for (i = start; i < len; ++i) -+ { -+ char c = expansion->text.buffer[i]; -+ -+ if (c == '\\' || c == '"') -+ vkd3d_string_buffer_printf(buffer, "\\"); -+ vkd3d_string_buffer_printf(buffer, "%c", c); -+ } -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, "%s", p); -+ } -+ vkd3d_string_buffer_printf(buffer, "\""); -+} -+ - int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - struct preproc_ctx *ctx = yyget_extra(scanner); -@@ -439,9 +491,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - switch (func_state->state) - { - case STATE_NONE: -- { -- struct preproc_macro *macro; -- - if (token == T_CONCAT && should_concat(ctx)) - { - while (ctx->buffer.content_size -@@ -450,37 +499,17 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - break; - } - -- /* Stringification, however, is only done for function-like -- * macro bodies. */ -- if (token == T_HASHSTRING && (macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ if (token == T_HASHSTRING) - { -- const struct preproc_text *expansion; -- const char *p = text + 1; -- unsigned int i; -+ struct vkd3d_string_buffer buffer; - - if (ctx->current_directive) - return return_token(token, lval, text); - -- while (*p == ' ' || *p == '\t') -- ++p; -- -- vkd3d_string_buffer_printf(&ctx->buffer, "\""); -- if ((expansion = find_arg_expansion(ctx, p))) -- { -- for (i = 0; i < expansion->text.content_size; ++i) -- { -- char c = expansion->text.buffer[i]; -- -- if (c == '\\' || c == '"') -- vkd3d_string_buffer_printf(&ctx->buffer, "\\"); -- vkd3d_string_buffer_printf(&ctx->buffer, "%c", c); -- } -- } -- else -- { -- vkd3d_string_buffer_printf(&ctx->buffer, "%s", p); -- } -- vkd3d_string_buffer_printf(&ctx->buffer, "\""); -+ vkd3d_string_buffer_init(&buffer); -+ preproc_stringify(ctx, &buffer, text); -+ vkd3d_string_buffer_printf(&ctx->buffer, "%s", buffer.buffer); -+ vkd3d_string_buffer_cleanup(&buffer); - break; - } - -@@ -584,7 +613,6 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - else - vkd3d_string_buffer_printf(&ctx->buffer, "%s ", text); - break; -- } - - case STATE_IDENTIFIER: - if (token == '(') -@@ -626,6 +654,41 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - - switch (token) - { -+ /* Most text gets left alone (e.g. if it contains macros, -+ * the macros should be evaluated later). -+ * Arguments are a special case, and are replaced with -+ * their values immediately. */ -+ case T_IDENTIFIER: -+ case T_IDENTIFIER_PAREN: -+ { -+ const struct preproc_text *expansion; -+ -+ if ((expansion = find_arg_expansion(ctx, text))) -+ { -+ preproc_push_expansion(ctx, expansion, NULL); -+ continue; -+ } -+ -+ if (current_arg) -+ preproc_text_add(current_arg, text); -+ break; -+ } -+ -+ /* Stringification is another special case. Unsurprisingly, -+ * we need to stringify if this is an argument. More -+ * surprisingly, we need to stringify even if it's not. */ -+ case T_HASHSTRING: -+ { -+ struct vkd3d_string_buffer buffer; -+ -+ vkd3d_string_buffer_init(&buffer); -+ preproc_stringify(ctx, &buffer, text); -+ if (current_arg) -+ preproc_text_add(current_arg, buffer.buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+ break; -+ } -+ - case T_NEWLINE: - if (current_arg) - preproc_text_add(current_arg, " "); -@@ -684,6 +747,9 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - if (current_arg) - preproc_text_add(current_arg, text); - } -+ -+ if (current_arg) -+ preproc_text_add(current_arg, " "); - break; - } - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 53e13735937..8285b56a17c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -18,7 +18,7 @@ - */ - - #include "vkd3d_shader_private.h" --#include "wine/rbtree.h" -+#include "rbtree.h" - - #include - #include -@@ -168,7 +168,7 @@ static void vkd3d_spirv_validate(const struct vkd3d_shader_code *spirv, - - #endif /* HAVE_SPIRV_TOOLS */ - --static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, -+enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, - unsigned int index) - { - switch (sysval) -@@ -199,14 +199,19 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu - } - } - --static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) -+static inline bool register_is_undef(const struct vkd3d_shader_register *reg) - { -- return vkd3d_siv_from_sysval_indexed(sysval, 0); -+ return reg->type == VKD3DSPR_UNDEF; -+} -+ -+static inline bool register_is_constant_or_undef(const struct vkd3d_shader_register *reg) -+{ -+ return register_is_constant(reg) || register_is_undef(reg); - } - - #define VKD3D_SPIRV_VERSION 0x00010000 - #define VKD3D_SPIRV_GENERATOR_ID 18 --#define VKD3D_SPIRV_GENERATOR_VERSION 7 -+#define VKD3D_SPIRV_GENERATOR_VERSION 9 - #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) - - struct vkd3d_spirv_stream -@@ -1751,6 +1756,38 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - } - } - -+static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder *builder, -+ enum vkd3d_data_type data_type, unsigned int component_count) -+{ -+ uint32_t scalar_id; -+ -+ if (component_count == 1) -+ { -+ switch (data_type) -+ { -+ case VKD3D_DATA_FLOAT: -+ case VKD3D_DATA_SNORM: -+ case VKD3D_DATA_UNORM: -+ return vkd3d_spirv_get_op_type_float(builder, 32); -+ break; -+ case VKD3D_DATA_INT: -+ case VKD3D_DATA_UINT: -+ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); -+ break; -+ case VKD3D_DATA_DOUBLE: -+ return vkd3d_spirv_get_op_type_float(builder, 64); -+ default: -+ FIXME("Unhandled data type %#x.\n", data_type); -+ return 0; -+ } -+ } -+ else -+ { -+ scalar_id = vkd3d_spirv_get_type_id_for_data_type(builder, data_type, 1); -+ return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); -+ } -+} -+ - static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) - { - vkd3d_spirv_stream_init(&builder->debug_stream); -@@ -1967,11 +2004,9 @@ struct vkd3d_symbol_register_data - uint32_t member_idx; - enum vkd3d_shader_component_type component_type; - unsigned int write_mask; -- uint32_t dcl_mask; - unsigned int structure_stride; - unsigned int binding_base_idx; - bool is_aggregate; /* An aggregate, i.e. a structure or an array. */ -- bool is_dynamically_indexed; /* If member_idx is a variable ID instead of a constant. */ - }; - - struct vkd3d_symbol_resource_data -@@ -2064,10 +2099,14 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, - symbol->type = VKD3D_SYMBOL_REGISTER; - memset(&symbol->key, 0, sizeof(symbol->key)); - symbol->key.reg.type = reg->type; -- if (vkd3d_shader_register_is_input(reg) && reg->idx[1].offset != ~0u) -- symbol->key.reg.idx = reg->idx[1].offset; -+ if (vkd3d_shader_register_is_input(reg) || vkd3d_shader_register_is_output(reg) -+ || vkd3d_shader_register_is_patch_constant(reg)) -+ { -+ symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; -+ assert(!reg->idx_count || symbol->key.reg.idx != ~0u); -+ } - else if (reg->type != VKD3DSPR_IMMCONSTBUFFER) -- symbol->key.reg.idx = reg->idx[0].offset; -+ symbol->key.reg.idx = reg->idx_count ? reg->idx[0].offset : ~0u; - } - - static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, -@@ -2080,11 +2119,9 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, - symbol->info.reg.member_idx = 0; - symbol->info.reg.component_type = component_type; - symbol->info.reg.write_mask = write_mask; -- symbol->info.reg.dcl_mask = 0; - symbol->info.reg.structure_stride = 0; - symbol->info.reg.binding_base_idx = 0; - symbol->info.reg.is_aggregate = false; -- symbol->info.reg.is_dynamically_indexed = false; - } - - static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, -@@ -2197,11 +2234,7 @@ struct vkd3d_push_constant_buffer_binding - - struct vkd3d_shader_phase - { -- enum vkd3d_shader_opcode type; -- unsigned int idx; -- unsigned int instance_count; - uint32_t function_id; -- uint32_t instance_id; - size_t function_location; - }; - -@@ -2253,10 +2286,11 @@ struct spirv_compiler - struct vkd3d_push_constant_buffer_binding *push_constants; - const struct vkd3d_shader_spirv_target_info *spirv_target_info; - -+ bool main_block_open; - bool after_declarations_section; -- const struct vkd3d_shader_signature *input_signature; -- const struct vkd3d_shader_signature *output_signature; -- const struct vkd3d_shader_signature *patch_constant_signature; -+ struct shader_signature input_signature; -+ struct shader_signature output_signature; -+ struct shader_signature patch_constant_signature; - const struct vkd3d_shader_transform_feedback_info *xfb_info; - struct vkd3d_shader_output_info - { -@@ -2271,14 +2305,15 @@ struct spirv_compiler - - uint32_t binding_idx; - -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; - unsigned int input_control_point_count; - unsigned int output_control_point_count; - bool use_vocp; - -- unsigned int shader_phase_count; -- struct vkd3d_shader_phase *shader_phases; -- size_t shader_phases_size; -+ enum vkd3d_shader_opcode phase; -+ bool emit_default_control_point_phase; -+ struct vkd3d_shader_phase control_point_phase; -+ struct vkd3d_shader_phase patch_constant_phase; - - uint32_t current_spec_constant_id; - unsigned int spec_constant_count; -@@ -2290,9 +2325,19 @@ struct spirv_compiler - struct vkd3d_string_buffer_cache string_buffers; - }; - --static bool is_control_point_phase(const struct vkd3d_shader_phase *phase) -+static bool is_in_default_phase(const struct spirv_compiler *compiler) -+{ -+ return compiler->phase == VKD3DSIH_INVALID; -+} -+ -+static bool is_in_control_point_phase(const struct spirv_compiler *compiler) -+{ -+ return compiler->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+} -+ -+static bool is_in_fork_or_join_phase(const struct spirv_compiler *compiler) - { -- return phase && phase->type == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+ return compiler->phase == VKD3DSIH_HS_FORK_PHASE || compiler->phase == VKD3DSIH_HS_JOIN_PHASE; - } - - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler); -@@ -2304,13 +2349,37 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil - return info && info->entry_point ? info->entry_point : "main"; - } - --struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, -- const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+static void spirv_compiler_destroy(struct spirv_compiler *compiler) -+{ -+ vkd3d_free(compiler->control_flow_info); -+ -+ vkd3d_free(compiler->output_info); -+ -+ vkd3d_free(compiler->push_constants); -+ vkd3d_free(compiler->descriptor_offset_ids); -+ -+ vkd3d_spirv_builder_free(&compiler->spirv_builder); -+ -+ rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); -+ -+ vkd3d_free(compiler->spec_constants); -+ -+ vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); -+ -+ shader_signature_cleanup(&compiler->input_signature); -+ shader_signature_cleanup(&compiler->output_signature); -+ shader_signature_cleanup(&compiler->patch_constant_signature); -+ -+ vkd3d_free(compiler); -+} -+ -+static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, -+ struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) - { -- const struct vkd3d_shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; -- const struct vkd3d_shader_signature *output_signature = &shader_desc->output_signature; -+ const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; -+ const struct shader_signature *output_signature = &shader_desc->output_signature; - const struct vkd3d_shader_interface_info *shader_interface; - const struct vkd3d_shader_descriptor_offset_info *offset_info; - const struct vkd3d_shader_spirv_target_info *target_info; -@@ -2402,10 +2471,6 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * - - compiler->shader_type = shader_version->type; - -- compiler->input_signature = &shader_desc->input_signature; -- compiler->output_signature = &shader_desc->output_signature; -- compiler->patch_constant_signature = &shader_desc->patch_constant_signature; -- - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { - compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); -@@ -2437,6 +2502,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * - - compiler->scan_descriptor_info = scan_descriptor_info; - -+ compiler->phase = VKD3DSIH_INVALID; -+ - vkd3d_string_buffer_cache_init(&compiler->string_buffers); - - spirv_compiler_emit_initial_declarations(compiler); -@@ -2504,13 +2571,13 @@ static bool spirv_compiler_check_shader_visibility(const struct spirv_compiler * - } - - static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_constant_buffer( -- const struct spirv_compiler *compiler, const struct vkd3d_shader_constant_buffer *cb) -+ const struct spirv_compiler *compiler, const struct vkd3d_shader_register_range *range) - { -- unsigned int register_space = cb->range.space; -- unsigned int reg_idx = cb->range.first; -+ unsigned int register_space = range->space; -+ unsigned int reg_idx = range->first; - unsigned int i; - -- if (cb->range.first != cb->range.last) -+ if (range->first != range->last) - return NULL; - - for (i = 0; i < compiler->shader_interface.push_constant_buffer_count; ++i) -@@ -2527,8 +2594,8 @@ static struct vkd3d_push_constant_buffer_binding *spirv_compiler_find_push_const - return NULL; - } - --static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *compiler, -- const struct vkd3d_shader_resource *resource, const struct vkd3d_shader_sampler *sampler) -+static bool spirv_compiler_has_combined_sampler_for_resource(const struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range) - { - const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; - const struct vkd3d_shader_combined_resource_sampler *combined_sampler; -@@ -2537,10 +2604,35 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com - if (!shader_interface->combined_sampler_count) - return false; - -- if (resource && (resource->reg.reg.type == VKD3DSPR_UAV || resource->range.last != resource->range.first)) -+ if (range->last != range->first) -+ return false; -+ -+ for (i = 0; i < shader_interface->combined_sampler_count; ++i) -+ { -+ combined_sampler = &shader_interface->combined_samplers[i]; -+ -+ if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) -+ continue; -+ -+ if ((combined_sampler->resource_space == range->space -+ && combined_sampler->resource_index == range->first)) -+ return true; -+ } -+ -+ return false; -+} -+ -+static bool spirv_compiler_has_combined_sampler_for_sampler(const struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range) -+{ -+ const struct vkd3d_shader_interface_info *shader_interface = &compiler->shader_interface; -+ const struct vkd3d_shader_combined_resource_sampler *combined_sampler; -+ unsigned int i; -+ -+ if (!shader_interface->combined_sampler_count) - return false; - -- if (sampler && sampler->range.first != sampler->range.last) -+ if (range->last != range->first) - return false; - - for (i = 0; i < shader_interface->combined_sampler_count; ++i) -@@ -2550,10 +2642,8 @@ static bool spirv_compiler_has_combined_sampler(const struct spirv_compiler *com - if (!spirv_compiler_check_shader_visibility(compiler, combined_sampler->shader_visibility)) - continue; - -- if ((!resource || (combined_sampler->resource_space == resource->range.space -- && combined_sampler->resource_index == resource->range.first)) -- && (!sampler || (combined_sampler->sampler_space == sampler->range.space -- && combined_sampler->sampler_index == sampler->range.first))) -+ if (combined_sampler->sampler_space == range->space -+ && combined_sampler->sampler_index == range->first) - return true; - } - -@@ -2571,6 +2661,16 @@ static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_error(struct spirv_compiler * - compiler->failed = true; - } - -+static void VKD3D_PRINTF_FUNC(3, 4) spirv_compiler_warning(struct spirv_compiler *compiler, -+ enum vkd3d_shader_error error, const char *format, ...) -+{ -+ va_list args; -+ -+ va_start(args, format); -+ vkd3d_shader_vwarning(compiler->message_context, &compiler->location, error, format, args); -+ va_end(args); -+} -+ - static struct vkd3d_string_buffer *vkd3d_shader_register_range_string(struct spirv_compiler *compiler, - const struct vkd3d_shader_register_range *range) - { -@@ -2857,7 +2957,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - { - unsigned int idx; - -- idx = reg->idx[1].offset != ~0u ? reg->idx[1].offset : reg->idx[0].offset; -+ idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : 0; - switch (reg->type) - { - case VKD3DSPR_RESOURCE: -@@ -2887,12 +2987,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - case VKD3DSPR_DEPTHOUTLE: - snprintf(buffer, buffer_size, "oDepth"); - break; -- case VKD3DSPR_FORKINSTID: -- snprintf(buffer, buffer_size, "vForkInstanceId"); -- break; -- case VKD3DSPR_JOININSTID: -- snprintf(buffer, buffer_size, "vJoinInstanceId"); -- break; - case VKD3DSPR_GSINSTID: - snprintf(buffer, buffer_size, "vGSInstanceID"); - break; -@@ -2965,18 +3059,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, - - static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, - struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, -- enum vkd3d_shader_component_type component_type, unsigned int component_count, unsigned int array_length) -+ enum vkd3d_shader_component_type component_type, unsigned int component_count, -+ const unsigned int *array_lengths, unsigned int length_count) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, length_id, ptr_type_id; -+ unsigned int i; - -- if (!array_length) -+ if (!length_count) - return spirv_compiler_emit_variable(compiler, - stream, storage_class, component_type, component_count); - - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); -- length_id = spirv_compiler_get_constant_uint(compiler, array_length); -- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); -+ for (i = 0; i < length_count; ++i) -+ { -+ if (!array_lengths[i]) -+ continue; -+ length_id = spirv_compiler_get_constant_uint(compiler, array_lengths[i]); -+ type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); -+ } -+ - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); - return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); - } -@@ -3169,7 +3271,6 @@ struct vkd3d_shader_register_info - unsigned int structure_stride; - unsigned int binding_base_idx; - bool is_aggregate; -- bool is_dynamically_indexed; - }; - - static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, -@@ -3178,13 +3279,13 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - struct vkd3d_symbol reg_symbol, *symbol; - struct rb_entry *entry; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - - if (reg->type == VKD3DSPR_TEMP) - { - assert(reg->idx[0].offset < compiler->temp_count); - register_info->id = compiler->temp_id + reg->idx[0].offset; -- register_info->storage_class = SpvStorageClassFunction; -+ register_info->storage_class = SpvStorageClassPrivate; - register_info->descriptor_array = NULL; - register_info->member_idx = 0; - register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -@@ -3192,7 +3293,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - register_info->structure_stride = 0; - register_info->binding_base_idx = 0; - register_info->is_aggregate = false; -- register_info->is_dynamically_indexed = false; - return true; - } - -@@ -3214,7 +3314,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil - register_info->structure_stride = symbol->info.reg.structure_stride; - register_info->binding_base_idx = symbol->info.reg.binding_base_idx; - register_info->is_aggregate = symbol->info.reg.is_aggregate; -- register_info->is_dynamically_indexed = symbol->info.reg.is_dynamically_indexed; - - return true; - } -@@ -3344,41 +3443,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp - } - else if (register_info->is_aggregate) - { -- if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_INCONTROLPOINT) -- { -- /* Indices for these are swapped compared to the generated SPIR-V. */ -- if (reg->idx[1].offset != ~0u) -- indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); -- if (reg->idx[0].offset != ~0u) -- indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); -- } -- else -- { -- struct vkd3d_shader_register_index reg_idx = reg->idx[0]; -- -- if (reg->idx[1].rel_addr) -- FIXME("Relative addressing not implemented.\n"); -- -- if (register_info->is_dynamically_indexed) -- { -- indexes[index_count++] = vkd3d_spirv_build_op_load(builder, -- vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1), -- register_info->member_idx, SpvMemoryAccessMaskNone); -- } -- else -- { -- reg_idx.offset = register_info->member_idx; -- indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®_idx); -- } -- } -+ /* Indices for these are swapped compared to the generated SPIR-V. */ -+ if (reg->idx_count > 2) -+ indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); -+ if (reg->idx_count > 1) -+ indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); -+ if (!index_count) -+ /* A register sysval which is an array in SPIR-V, e.g. SAMPLEMASK. */ -+ indexes[index_count++] = spirv_compiler_get_constant_uint(compiler, 0); - } - else - { -- if (reg->idx[1].rel_addr || (reg->idx[1].offset == ~0u && reg->idx[0].rel_addr)) -+ if (reg->idx_count && reg->idx[reg->idx_count - 1].rel_addr) - FIXME("Relative addressing not implemented.\n"); - - /* Handle arrayed registers, e.g. v[3][0]. */ -- if (reg->idx[1].offset != ~0u && !register_is_descriptor(reg)) -+ if (reg->idx_count > 1 && !register_is_descriptor(reg)) - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); - } - -@@ -3541,6 +3621,19 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi - vkd3d_component_type_from_data_type(reg->data_type), component_count, values); - } - -+static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register *reg, DWORD write_mask) -+{ -+ unsigned int component_count = vkd3d_write_mask_component_count(write_mask); -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ uint32_t type_id; -+ -+ assert(reg->type == VKD3DSPR_UNDEF); -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); -+ return vkd3d_spirv_build_op_undef(builder, &builder->global_stream, type_id); -+} -+ - static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, DWORD swizzle, DWORD write_mask, - const struct vkd3d_shader_register_info *reg_info) -@@ -3551,7 +3644,7 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type; - unsigned int skipped_component_mask; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - assert(vkd3d_write_mask_component_count(write_mask) == 1); - - component_idx = vkd3d_write_mask_get_component_idx(write_mask); -@@ -3603,6 +3696,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - return spirv_compiler_emit_load_constant(compiler, reg, swizzle, write_mask); - else if (reg->type == VKD3DSPR_IMMCONST64) - return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); -+ else if (reg->type == VKD3DSPR_UNDEF) -+ return spirv_compiler_emit_load_undef(compiler, reg, write_mask); - - component_count = vkd3d_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -3815,7 +3910,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, - unsigned int src_write_mask = write_mask; - uint32_t type_id; - -- assert(reg->type != VKD3DSPR_IMMCONST && reg->type != VKD3DSPR_IMMCONST64); -+ assert(!register_is_constant_or_undef(reg)); - - if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) - return; -@@ -3986,6 +4081,11 @@ static void spirv_compiler_emit_interpolation_decorations(struct spirv_compiler - vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); - break; -+ case VKD3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE: -+ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationNoPerspective, NULL, 0); -+ vkd3d_spirv_enable_capability(builder, SpvCapabilitySampleRateShading); -+ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationSample, NULL, 0); -+ break; - default: - FIXME("Unhandled interpolation mode %#x.\n", mode); - break; -@@ -4249,35 +4349,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp - if ((builtin = get_spirv_builtin_for_register(reg_type))) - return builtin; - -- if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT)) -+ if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT -+ && reg_type != VKD3DSPR_PATCHCONST)) - FIXME("Unhandled builtin (register type %#x, sysval %#x).\n", reg_type, sysval); - return NULL; - } - --static const struct vkd3d_shader_signature_element *vkd3d_find_signature_element_for_reg( -- const struct vkd3d_shader_signature *signature, unsigned int *signature_element_index, -- unsigned int reg_idx, DWORD write_mask) --{ -- unsigned int signature_idx; -- -- for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) -- { -- if (signature->elements[signature_idx].register_index == reg_idx -- && (signature->elements[signature_idx].mask & write_mask) == write_mask) -- { -- if (signature_element_index) -- *signature_element_index = signature_idx; -- return &signature->elements[signature_idx]; -- } -- } -- -- FIXME("Could not find shader signature element (register %u, write mask %#x).\n", -- reg_idx, write_mask); -- if (signature_element_index) -- *signature_element_index = ~0u; -- return NULL; --} -- - static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) - { - struct vkd3d_shader_register r; -@@ -4288,6 +4365,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler - r.type = VKD3DSPR_OUTPOINTID; - r.idx[0].offset = ~0u; - r.idx[1].offset = ~0u; -+ r.idx_count = 0; - return spirv_compiler_get_register_id(compiler, &r); - } - -@@ -4302,7 +4380,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co - } - - static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compiler, -- uint32_t id, const struct vkd3d_shader_phase *phase, const char *suffix) -+ uint32_t id, const char *suffix) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const char *name; -@@ -4310,7 +4388,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile - if (!suffix) - suffix = ""; - -- switch (phase->type) -+ switch (compiler->phase) - { - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - name = "control"; -@@ -4322,62 +4400,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile - name = "join"; - break; - default: -- ERR("Invalid phase type %#x.\n", phase->type); -+ ERR("Invalid phase type %#x.\n", compiler->phase); - return; - } -- vkd3d_spirv_build_op_name(builder, id, "%s%u%s", name, phase->idx, suffix); --} -- --static void spirv_compiler_begin_shader_phase(struct spirv_compiler *compiler, -- struct vkd3d_shader_phase *phase) --{ -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- uint32_t void_id, function_type_id; -- unsigned int param_count; -- uint32_t param_type_id; -- -- if (phase->instance_count) -- { -- param_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -- param_count = 1; -- } -- else -- { -- param_count = 0; -- } -- -- phase->function_id = vkd3d_spirv_alloc_id(builder); -- -- void_id = vkd3d_spirv_get_op_type_void(builder); -- function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, ¶m_type_id, param_count); -- vkd3d_spirv_build_op_function(builder, void_id, phase->function_id, -- SpvFunctionControlMaskNone, function_type_id); -- -- if (phase->instance_count) -- phase->instance_id = vkd3d_spirv_build_op_function_parameter(builder, param_type_id); -- -- vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); -- phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); -- -- spirv_compiler_emit_shader_phase_name(compiler, phase->function_id, phase, NULL); -+ vkd3d_spirv_build_op_name(builder, id, "%s%s", name, suffix); - } - - static const struct vkd3d_shader_phase *spirv_compiler_get_current_shader_phase( - struct spirv_compiler *compiler) - { -- struct vkd3d_shader_phase *phase; -- -- if (!compiler->shader_phase_count) -+ if (is_in_default_phase(compiler)) - return NULL; - -- phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; -- if (!phase->function_id) -- spirv_compiler_begin_shader_phase(compiler, phase); -- return phase; -+ return is_in_control_point_phase(compiler) ? &compiler->control_point_phase : &compiler->patch_constant_phase; - } - - static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, -- uint32_t id, unsigned int component_count, const struct vkd3d_shader_signature_element *signature_element) -+ uint32_t id, unsigned int component_count, const struct signature_element *signature_element) - { - const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; - const struct vkd3d_shader_transform_feedback_element *xfb_element; -@@ -4436,17 +4475,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); - } - --static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, -- const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) -+static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *compiler, -+ const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, const unsigned int *array_sizes, -+ unsigned int size_count) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ unsigned int sizes[2]; - uint32_t id; - -- array_size = max(array_size, builtin->spirv_array_size); -+ assert(size_count <= ARRAY_SIZE(sizes)); -+ memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); -+ array_sizes = sizes; -+ sizes[0] = max(sizes[0], builtin->spirv_array_size); - -- id = spirv_compiler_emit_array_variable(compiler, -- &builder->global_stream, storage_class, -- builtin->component_type, builtin->component_count, array_size); -+ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, -+ builtin->component_type, builtin->component_count, array_sizes, size_count); - vkd3d_spirv_add_iface_variable(builder, id); - spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin); - -@@ -4458,54 +4501,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp - return id; - } - --static bool needs_private_io_variable(const struct vkd3d_shader_signature *signature, -- unsigned int reg_idx, const struct vkd3d_spirv_builtin *builtin, -- unsigned int *component_count, unsigned int *out_write_mask) -+static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, -+ const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) - { -- unsigned int write_mask = 0; -- bool have_sysval = false; -- unsigned int i, count; -- -- /* Always use private variables for arrayed builtins. These are generally -- * scalars on the D3D side, so would need extra array indices when -- * accessing them. It may be feasible to insert those indices at the point -- * where the builtins are used, but it's not clear it's worth the effort. */ -- if (builtin && (builtin->spirv_array_size || builtin->fixup_pfn)) -- return true; -- -- if (*component_count == VKD3D_VEC4_SIZE) -- return false; -- -- for (i = 0, count = 0; i < signature->element_count; ++i) -- { -- const struct vkd3d_shader_signature_element *current = &signature->elements[i]; -+ return spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, &array_size, 1); -+} - -- if (current->register_index != reg_idx) -- continue; -+static bool needs_private_io_variable(const struct vkd3d_spirv_builtin *builtin) -+{ -+ return builtin && builtin->fixup_pfn; -+} - -- write_mask |= current->mask; -- ++count; -+static unsigned int shader_signature_next_location(const struct shader_signature *signature) -+{ -+ unsigned int i, max_row; - -- if (current->sysval_semantic) -- have_sysval = true; -- } -+ if (!signature) -+ return 0; - -- if (count == 1) -- return false; -+ for (i = 0, max_row = 0; i < signature->element_count; ++i) -+ max_row = max(max_row, signature->elements[i].register_index + signature->elements[i].register_count); -+ return max_row; -+} - -- if (builtin || have_sysval) -- return true; -+static unsigned int shader_register_get_io_indices(const struct vkd3d_shader_register *reg, -+ unsigned int *array_sizes) -+{ -+ unsigned int i, element_idx; - -- if (!vkd3d_bitmask_is_contiguous(write_mask)) -+ array_sizes[0] = 0; -+ array_sizes[1] = 0; -+ element_idx = reg->idx[0].offset; -+ for (i = 1; i < reg->idx_count; ++i) - { -- FIXME("Write mask %#x is non-contiguous.\n", write_mask); -- return true; -+ array_sizes[1] = array_sizes[0]; -+ array_sizes[0] = element_idx; -+ element_idx = reg->idx[i].offset; - } - -- assert(vkd3d_write_mask_component_count(write_mask) >= *component_count); -- *component_count = vkd3d_write_mask_component_count(write_mask); -- *out_write_mask = write_mask; -- return false; -+ return element_idx; - } - - static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, -@@ -4513,50 +4547,35 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - enum vkd3d_shader_interpolation_mode interpolation_mode) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_signature_element *signature_element; -- const struct vkd3d_shader_signature *shader_signature; - const struct vkd3d_shader_register *reg = &dst->reg; - unsigned int component_idx, input_component_count; -+ const struct signature_element *signature_element; -+ const struct shader_signature *shader_signature; - enum vkd3d_shader_component_type component_type; - uint32_t type_id, ptr_type_id, float_type_id; - const struct vkd3d_spirv_builtin *builtin; -+ unsigned int write_mask, reg_write_mask; - struct vkd3d_symbol *symbol = NULL; - uint32_t val_id, input_id, var_id; - struct vkd3d_symbol reg_symbol; -- struct vkd3d_symbol tmp_symbol; - SpvStorageClass storage_class; - struct rb_entry *entry = NULL; - bool use_private_var = false; -- unsigned int write_mask; -- unsigned int array_size; -- unsigned int reg_idx; -+ unsigned int array_sizes[2]; -+ unsigned int element_idx; - uint32_t i, index; - -- assert(!reg->idx[0].rel_addr); -- assert(!reg->idx[1].rel_addr); -- -- if (reg->idx[1].offset != ~0u) -- { -- array_size = reg->idx[0].offset; -- reg_idx = reg->idx[1].offset; -- } -- else -- { -- array_size = 0; -- reg_idx = reg->idx[0].offset; -- } -+ assert(!reg->idx_count || !reg->idx[0].rel_addr); -+ assert(reg->idx_count < 2 || !reg->idx[1].rel_addr); - - shader_signature = reg->type == VKD3DSPR_PATCHCONST -- ? compiler->patch_constant_signature : compiler->input_signature; -+ ? &compiler->patch_constant_signature : &compiler->input_signature; - -- if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, -- NULL, reg_idx, dst->write_mask))) -- { -- FIXME("No signature element for shader input, ignoring shader input.\n"); -- return 0; -- } -+ element_idx = shader_register_get_io_indices(reg, array_sizes); -+ signature_element = &shader_signature->elements[element_idx]; - -- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) -+ if ((compiler->shader_type == VKD3D_SHADER_TYPE_HULL || compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY) -+ && !sysval && signature_element->sysval_semantic) - sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic); - - builtin = get_spirv_builtin_for_sysval(compiler, sysval); -@@ -4576,12 +4595,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); - } - -- if (needs_private_io_variable(shader_signature, reg_idx, builtin, &input_component_count, &write_mask) -- && (compiler->shader_type != VKD3D_SHADER_TYPE_HULL -- || (reg->type != VKD3DSPR_INCONTROLPOINT && reg->type != VKD3DSPR_PATCHCONST))) -+ if (needs_private_io_variable(builtin)) -+ { - use_private_var = true; -+ reg_write_mask = write_mask; -+ } - else -+ { - component_idx = vkd3d_write_mask_get_component_idx(write_mask); -+ reg_write_mask = write_mask >> component_idx; -+ } - - storage_class = SpvStorageClassInput; - -@@ -4589,111 +4612,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - { -+ /* Except for vicp there should be one declaration per signature element. Sources of -+ * duplicate declarations are: a single register split into multiple declarations having -+ * different components, which should have been merged, and declarations in one phase -+ * being repeated in another (i.e. vcp/vocp), which should have been deleted. */ -+ if (reg->type != VKD3DSPR_INPUT || !is_in_fork_or_join_phase(compiler)) -+ FIXME("Duplicate input definition found.\n"); - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); -- input_id = symbol->id; -- } -- else if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL -- && (reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)) -- { -- /* Input/output registers from one phase can be used as inputs in -- * subsequent phases. Specifically: -- * -- * - Control phase inputs are available as "vicp" in fork and join -- * phases. -- * - Control phase outputs are available as "vocp" in fork and join -- * phases. -- * - Fork phase patch constants are available as "vpc" in join -- * phases. -- * -- * We handle "vicp" and "vpc" here by creating aliases to the shader's -- * global inputs and outputs. We handle "vocp" in -- * spirv_compiler_leave_shader_phase(). */ -- -- tmp_symbol = reg_symbol; -- if (reg->type == VKD3DSPR_PATCHCONST) -- tmp_symbol.key.reg.type = VKD3DSPR_OUTPUT; -- else -- tmp_symbol.key.reg.type = VKD3DSPR_INPUT; -- -- if ((entry = rb_get(&compiler->symbol_table, &tmp_symbol))) -- { -- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); -- tmp_symbol = *symbol; -- tmp_symbol.key.reg.type = reg->type; -- spirv_compiler_put_symbol(compiler, &tmp_symbol); -- -- input_id = symbol->id; -- } -- else -- { -- if (reg->type == VKD3DSPR_PATCHCONST) -- ERR("Patch constant register %u was not declared in a previous phase.\n", reg_idx); -- else -- ERR("Input control point register %u was not declared in a previous phase.\n", reg_idx); -- } -+ return symbol->id; - } - -- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) -+ if (builtin) - { -- if (builtin) -- { -- input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); -- if (reg->type == VKD3DSPR_PATCHCONST) -- vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); -- } -- else -- { -- unsigned int location = reg_idx; -- -- input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -- storage_class, component_type, input_component_count, array_size); -- vkd3d_spirv_add_iface_variable(builder, input_id); -- if (reg->type == VKD3DSPR_PATCHCONST) -- { -- vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); -- location += compiler->input_signature->element_count; -- } -- vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); -- if (component_idx) -- vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); -- -- spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); -- } -+ input_id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); -+ if (reg->type == VKD3DSPR_PATCHCONST) -+ vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); - } -- -- if (!symbol) -+ else - { -- var_id = input_id; -- if (use_private_var) -+ unsigned int location = signature_element->target_location; -+ -+ input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -+ storage_class, component_type, input_component_count, array_sizes, 2); -+ vkd3d_spirv_add_iface_variable(builder, input_id); -+ if (reg->type == VKD3DSPR_PATCHCONST) - { -- storage_class = SpvStorageClassPrivate; -- var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -- storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_size); -+ vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); -+ location += shader_signature_next_location(&compiler->input_signature); - } -+ vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); -+ if (component_idx) -+ vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); - -- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -- use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, -- use_private_var ? VKD3DSP_WRITEMASK_ALL : write_mask); -- reg_symbol.info.reg.dcl_mask |= write_mask; -- spirv_compiler_put_symbol(compiler, ®_symbol); -- -- spirv_compiler_emit_register_debug_name(builder, var_id, reg); -+ spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); - } -- else -+ -+ var_id = input_id; -+ if (use_private_var) - { -- symbol->info.reg.dcl_mask |= write_mask; -+ storage_class = SpvStorageClassPrivate; -+ var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -+ storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_sizes, 2); - } - -+ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -+ use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, -+ use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); -+ reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; -+ assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); -+ spirv_compiler_put_symbol(compiler, ®_symbol); -+ -+ spirv_compiler_emit_register_debug_name(builder, var_id, reg); -+ - if (use_private_var) - { - type_id = vkd3d_spirv_get_type_id(builder, component_type, input_component_count); -- for (i = 0; i < max(array_size, 1); ++i) -+ for (i = 0; i < max(array_sizes[0], 1); ++i) - { - struct vkd3d_shader_register dst_reg = *reg; - dst_reg.data_type = VKD3D_DATA_FLOAT; - - val_id = input_id; -- if (array_size) -+ if (array_sizes[0]) - { - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); - index = spirv_compiler_get_constant_uint(compiler, i); -@@ -4708,7 +4688,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); - index = spirv_compiler_get_constant_uint(compiler, builtin->member_idx); - val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); -- dst_reg.idx[0].offset = reg_idx + i; -+ dst_reg.idx[0].offset = element_idx + i; - } - val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone); - -@@ -4743,9 +4723,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, - uint32_t write_mask; - uint32_t input_id; - -- assert(!reg->idx[0].rel_addr); -- assert(!reg->idx[1].rel_addr); -- assert(reg->idx[1].offset == ~0u); -+ assert(!reg->idx_count || !reg->idx[0].rel_addr); -+ assert(reg->idx_count < 2); - - if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { -@@ -4763,19 +4742,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, - write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); - vkd3d_symbol_set_register_info(®_symbol, input_id, - SpvStorageClassInput, builtin->component_type, write_mask); -- reg_symbol.info.reg.dcl_mask = write_mask; - reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; - spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_debug_name(builder, input_id, reg); - } - - static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compiler, -- const struct vkd3d_shader_phase *phase, const struct vkd3d_shader_dst_param *dst) -+ const struct vkd3d_shader_dst_param *dst) - { -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_register *reg = &dst->reg; -- struct vkd3d_symbol reg_symbol; -- uint32_t val_id; - - switch (reg->type) - { -@@ -4787,10 +4762,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil - case VKD3DSPR_PRIMID: - spirv_compiler_emit_input_register(compiler, dst); - return; -- case VKD3DSPR_FORKINSTID: -- case VKD3DSPR_JOININSTID: -- val_id = phase->instance_id; -- break; - case VKD3DSPR_OUTPOINTID: /* Emitted in spirv_compiler_emit_initial_declarations(). */ - case VKD3DSPR_OUTCONTROLPOINT: /* See spirv_compiler_leave_shader_phase(). */ - return; -@@ -4798,22 +4769,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil - FIXME("Unhandled shader phase input register %#x.\n", reg->type); - return; - } -- -- vkd3d_symbol_make_register(®_symbol, reg); -- vkd3d_symbol_set_register_info(®_symbol, val_id, -- SpvStorageClassMax /* Intermediate value */, -- VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); -- spirv_compiler_put_symbol(compiler, ®_symbol); -- spirv_compiler_emit_register_debug_name(builder, val_id, reg); --} -- --static unsigned int spirv_compiler_get_output_variable_index( -- struct spirv_compiler *compiler, unsigned int register_idx) --{ -- if (register_idx == ~0u) /* oDepth */ -- return ARRAY_SIZE(compiler->private_output_variable) - 1; -- assert(register_idx < ARRAY_SIZE(compiler->private_output_variable) - 1); -- return register_idx; - } - - static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, -@@ -4835,58 +4790,32 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) - return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; - } - --static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signature_element *e, -- uint32_t *mask) -+static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) - { -+ unsigned int write_mask; -+ - if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) - { - FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); - return; - } - -- *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); -+ write_mask = e->mask >> vkd3d_write_mask_get_component_idx(e->mask); -+ *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); - } - --static uint32_t calculate_sysval_array_mask(struct spirv_compiler *compiler, -- const struct vkd3d_shader_signature *signature, enum vkd3d_shader_input_sysval_semantic sysval) -+/* Emits arrayed SPIR-V built-in variables. */ -+static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) - { -- const struct vkd3d_shader_signature_element *e; -- const struct vkd3d_spirv_builtin *sig_builtin; -+ const struct shader_signature *output_signature = &compiler->output_signature; -+ uint32_t clip_distance_mask = 0, clip_distance_id = 0; -+ uint32_t cull_distance_mask = 0, cull_distance_id = 0; - const struct vkd3d_spirv_builtin *builtin; -- uint32_t signature_idx, mask = 0; -- -- if (!(builtin = get_spirv_builtin_for_sysval(compiler, sysval))) -- { -- FIXME("Unhandled sysval %#x.\n", sysval); -- return 0; -- } -+ unsigned int i, count; - -- for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) -+ for (i = 0; i < output_signature->element_count; ++i) - { -- e = &signature->elements[signature_idx]; -- -- sig_builtin = get_spirv_builtin_for_sysval(compiler, -- vkd3d_siv_from_sysval_indexed(e->sysval_semantic, e->semantic_index)); -- -- if (sig_builtin && sig_builtin->spirv_builtin == builtin->spirv_builtin) -- mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * sig_builtin->member_idx); -- } -- -- return mask; --} -- --/* Emits arrayed SPIR-V built-in variables. */ --static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) --{ -- const struct vkd3d_shader_signature *output_signature = compiler->output_signature; -- uint32_t clip_distance_mask = 0, clip_distance_id = 0; -- uint32_t cull_distance_mask = 0, cull_distance_id = 0; -- const struct vkd3d_spirv_builtin *builtin; -- unsigned int i, count; -- -- for (i = 0; i < output_signature->element_count; ++i) -- { -- const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; -+ const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { -@@ -4921,7 +4850,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * - - for (i = 0; i < output_signature->element_count; ++i) - { -- const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; -+ const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { -@@ -4953,9 +4882,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - uint32_t write_mask; - uint32_t output_id; - -- assert(!reg->idx[0].rel_addr); -- assert(!reg->idx[1].rel_addr); -- assert(reg->idx[1].offset == ~0u); -+ assert(!reg->idx_count || !reg->idx[0].rel_addr); -+ assert(reg->idx_count < 2); - - if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { -@@ -4969,7 +4897,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); - vkd3d_symbol_set_register_info(®_symbol, output_id, - SpvStorageClassOutput, builtin->component_type, write_mask); -- reg_symbol.info.reg.dcl_mask = write_mask; - reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; - spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_execution_mode(compiler, reg); -@@ -4977,7 +4904,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - } - - static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, -- const struct vkd3d_shader_phase *phase, const struct vkd3d_spirv_builtin *builtin) -+ const struct vkd3d_spirv_builtin *builtin) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t *variable_id, id; -@@ -4993,7 +4920,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c - return *variable_id; - - id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); -- if (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE) -+ if (is_in_fork_or_join_phase(compiler)) - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); - - if (variable_id) -@@ -5005,50 +4932,39 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_signature_element *signature_element; -- const struct vkd3d_shader_signature *shader_signature; - const struct vkd3d_shader_register *reg = &dst->reg; - unsigned int component_idx, output_component_count; -+ const struct signature_element *signature_element; - enum vkd3d_shader_component_type component_type; -+ const struct shader_signature *shader_signature; - const struct vkd3d_spirv_builtin *builtin; -- const struct vkd3d_shader_phase *phase; -- struct vkd3d_symbol *symbol = NULL; -+ unsigned int write_mask, reg_write_mask; - bool use_private_variable = false; - struct vkd3d_symbol reg_symbol; - SpvStorageClass storage_class; -- struct rb_entry *entry = NULL; -- unsigned int signature_idx; -- unsigned int write_mask; -- unsigned int array_size; -+ unsigned int array_sizes[2]; -+ unsigned int element_idx; - bool is_patch_constant; - uint32_t id, var_id; - -- phase = spirv_compiler_get_current_shader_phase(compiler); -- is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); -- -- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; -+ is_patch_constant = is_in_fork_or_join_phase(compiler); - -- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; -+ shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; - -- if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, -- &signature_idx, reg->idx[0].offset, dst->write_mask))) -- { -- FIXME("No signature element for shader output, ignoring shader output.\n"); -- return; -- } -+ element_idx = shader_register_get_io_indices(reg, array_sizes); -+ signature_element = &shader_signature->elements[element_idx]; - - builtin = vkd3d_get_spirv_builtin(compiler, dst->reg.type, sysval); - - write_mask = signature_element->mask; - -- component_idx = vkd3d_write_mask_get_component_idx(dst->write_mask); -- output_component_count = vkd3d_write_mask_component_count(signature_element->mask); -+ component_idx = vkd3d_write_mask_get_component_idx(write_mask); -+ output_component_count = vkd3d_write_mask_component_count(write_mask); - if (builtin) - { - component_type = builtin->component_type; - if (!builtin->spirv_array_size) - output_component_count = builtin->component_count; -- component_idx = 0; - } - else - { -@@ -5058,128 +4974,104 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - storage_class = SpvStorageClassOutput; - - if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE -- || needs_private_io_variable(shader_signature, signature_element->register_index, -- builtin, &output_component_count, &write_mask) -- || is_patch_constant) -+ || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask) -+ || needs_private_io_variable(builtin)) -+ { - use_private_variable = true; -- else -- component_idx = vkd3d_write_mask_get_component_idx(write_mask); -+ } - -+ reg_write_mask = write_mask >> component_idx; - vkd3d_symbol_make_register(®_symbol, reg); - -- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) -+ if (rb_get(&compiler->symbol_table, ®_symbol)) - { -- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); -- id = symbol->id; -+ /* See spirv_compiler_emit_input() for possible causes. */ -+ FIXME("Duplicate output definition found.\n"); -+ return; - } - -- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) -+ if (compiler->output_info[element_idx].id) - { -- if (compiler->output_info[signature_idx].id) -- { -- id = compiler->output_info[signature_idx].id; -- if (compiler->output_info[signature_idx].array_element_mask) -- use_private_variable = true; -- } -- else if (builtin) -- { -- if (phase) -- id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, phase, builtin); -- else -- id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); -- -- if (builtin->spirv_array_size) -- compiler->output_info[signature_idx].array_element_mask = -- calculate_sysval_array_mask(compiler, shader_signature, sysval); -- -- spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); -- } -+ id = compiler->output_info[element_idx].id; -+ } -+ else if (builtin) -+ { -+ if (spirv_compiler_get_current_shader_phase(compiler)) -+ id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, builtin); - else -- { -- unsigned int location = reg->idx[0].offset; -- -- if (is_patch_constant) -- location += compiler->output_signature->element_count; -- -- id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -- storage_class, component_type, output_component_count, array_size); -- vkd3d_spirv_add_iface_variable(builder, id); -- -- if (is_dual_source_blending(compiler) && reg->idx[0].offset < 2) -- { -- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); -- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, reg->idx[0].offset); -- } -- else -- { -- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); -- } -+ id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); - -- if (component_idx) -- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); -- } -- -- if (is_patch_constant) -- vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); -- -- spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); -- -- compiler->output_info[signature_idx].id = id; -- compiler->output_info[signature_idx].component_type = component_type; -+ spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); - } -- -- if (!symbol) -+ else if (signature_element->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ { -+ storage_class = SpvStorageClassPrivate; -+ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -+ storage_class, component_type, output_component_count, array_sizes, 2); -+ } -+ else - { -- var_id = id; -- if (use_private_variable) -- storage_class = SpvStorageClassPrivate; -+ unsigned int location = signature_element->target_location; -+ - if (is_patch_constant) -- var_id = compiler->hs.patch_constants_id; -- else if (use_private_variable) -- var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, -- storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); -+ location += shader_signature_next_location(&compiler->output_signature); - -- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -- use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, -- use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); -- reg_symbol.info.reg.is_aggregate = use_private_variable ? is_patch_constant : array_size; -- if (!use_private_variable && is_control_point_phase(phase)) -+ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -+ storage_class, component_type, output_component_count, array_sizes, 2); -+ vkd3d_spirv_add_iface_variable(builder, id); -+ -+ if (is_dual_source_blending(compiler) && location < 2) - { -- reg_symbol.info.reg.member_idx = spirv_compiler_get_invocation_id(compiler); -- reg_symbol.info.reg.is_dynamically_indexed = true; -+ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); -+ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, location); - } -- else if (is_patch_constant) -+ else - { -- reg_symbol.info.reg.member_idx = reg->idx[0].offset; -+ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); - } -- reg_symbol.info.reg.dcl_mask = write_mask; -- -- spirv_compiler_put_symbol(compiler, ®_symbol); - -- if (!is_patch_constant) -- spirv_compiler_emit_register_debug_name(builder, var_id, reg); -+ if (component_idx) -+ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); - } -- else -+ -+ if (is_patch_constant) -+ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); -+ -+ spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); -+ -+ compiler->output_info[element_idx].id = id; -+ compiler->output_info[element_idx].component_type = component_type; -+ -+ var_id = id; -+ if (use_private_variable) - { -- symbol->info.reg.dcl_mask |= write_mask; -- var_id = symbol->id; -+ storage_class = SpvStorageClassPrivate; -+ var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, -+ storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - } - -+ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -+ use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, -+ use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); -+ reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; -+ assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); -+ -+ spirv_compiler_put_symbol(compiler, ®_symbol); -+ -+ if (!is_patch_constant) -+ spirv_compiler_emit_register_debug_name(builder, var_id, reg); -+ - if (use_private_variable) - { -- unsigned int idx = spirv_compiler_get_output_variable_index(compiler, reg->idx[0].offset); -- compiler->private_output_variable[idx] = var_id; -- compiler->private_output_variable_write_mask[idx] |= dst->write_mask; -- if (is_patch_constant) -- compiler->private_output_variable_array_idx[idx] = spirv_compiler_get_constant_uint( -- compiler, reg->idx[0].offset); -+ compiler->private_output_variable[element_idx] = var_id; -+ compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; - if (!compiler->epilogue_function_id) - compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); - } - } - - static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, -- const struct vkd3d_shader_signature_element *e) -+ const struct signature_element *e) - { - enum vkd3d_shader_input_sysval_semantic sysval; - const struct vkd3d_spirv_builtin *builtin; -@@ -5198,14 +5090,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com - } - - static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, -- const struct vkd3d_shader_signature *signature, const struct vkd3d_shader_signature_element *output, -+ const struct shader_signature *signature, const struct signature_element *output, - const struct vkd3d_shader_output_info *output_info, - uint32_t output_index_id, uint32_t val_id, unsigned int write_mask) - { - unsigned int dst_write_mask, use_mask, uninit_mask, swizzle, mask; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; -- const struct vkd3d_shader_signature_element *element; -+ const struct signature_element *element; - unsigned int i, index, array_idx; - uint32_t output_id; - -@@ -5224,6 +5116,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi - use_mask |= element->used_mask; - } - } -+ index = vkd3d_write_mask_get_component_idx(output->mask); -+ dst_write_mask >>= index; -+ use_mask >>= index; - write_mask &= dst_write_mask; - - if (!write_mask) -@@ -5294,22 +5189,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * - uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; - uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_signature *signature; -- const struct vkd3d_shader_phase *phase; -+ const struct shader_signature *signature; - uint32_t output_index_id = 0; - bool is_patch_constant; - unsigned int i, count; -- DWORD variable_idx; - - STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_id)); - STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_type_id)); - STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_array_idx)); - STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_write_mask)); - -- phase = spirv_compiler_get_current_shader_phase(compiler); -- is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); -+ is_patch_constant = is_in_fork_or_join_phase(compiler); - -- signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; -+ signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; - - function_id = compiler->epilogue_function_id; - -@@ -5340,7 +5232,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * - param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); - } - -- if (is_control_point_phase(phase)) -+ if (is_in_control_point_phase(compiler)) - output_index_id = spirv_compiler_emit_load_invocation_id(compiler); - - for (i = 0; i < signature->element_count; ++i) -@@ -5348,14 +5240,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * - if (!compiler->output_info[i].id) - continue; - -- variable_idx = spirv_compiler_get_output_variable_index(compiler, -- signature->elements[i].register_index); -- if (!param_id[variable_idx]) -+ if (!param_id[i]) - continue; - - spirv_compiler_emit_store_shader_output(compiler, signature, - &signature->elements[i], &compiler->output_info[i], output_index_id, -- param_id[variable_idx], compiler->private_output_variable_write_mask[variable_idx]); -+ param_id[i], compiler->private_output_variable_write_mask[i]); - } - - vkd3d_spirv_build_op_return(&compiler->spirv_builder); -@@ -5375,28 +5265,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp - dst.reg.type = VKD3DSPR_OUTPOINTID; - dst.reg.idx[0].offset = ~0u; - dst.reg.idx[1].offset = ~0u; -+ dst.reg.idx_count = 0; - dst.write_mask = VKD3DSP_WRITEMASK_0; - spirv_compiler_emit_input_register(compiler, &dst); - } - --static void spirv_compiler_emit_hull_shader_patch_constants(struct spirv_compiler *compiler) --{ -- const struct vkd3d_shader_signature *signature = compiler->patch_constant_signature; -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- uint32_t register_count = 0; -- unsigned int signature_idx; -- -- for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) -- register_count = max(register_count, signature->elements[signature_idx].register_index + 1); -- -- if (!register_count) -- return; -- -- compiler->hs.patch_constants_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -- SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, register_count); -- vkd3d_spirv_build_op_name(builder, compiler->hs.patch_constants_id, "opc"); --} -- - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) - { - const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; -@@ -5410,7 +5283,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp - case VKD3D_SHADER_TYPE_HULL: - vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); - spirv_compiler_emit_hull_shader_builtins(compiler); -- spirv_compiler_emit_hull_shader_patch_constants(compiler); - break; - case VKD3D_SHADER_TYPE_DOMAIN: - vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); -@@ -5439,8 +5311,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp - if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - { - vkd3d_spirv_builder_begin_main_function(builder); -- -- spirv_compiler_emit_shader_signature_outputs(compiler); -+ compiler->main_block_open = true; - } - } - -@@ -5478,8 +5349,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler - WARN("Unhandled global flags %#x.\n", flags); - } - --static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - size_t function_location; -@@ -5490,11 +5360,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); - - assert(!compiler->temp_count); -- compiler->temp_count = instruction->declaration.count; -+ compiler->temp_count = count; - for (i = 0; i < compiler->temp_count; ++i) - { -- id = spirv_compiler_emit_variable(compiler, &builder->function_stream, -- SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); -+ id = spirv_compiler_emit_variable(compiler, &builder->global_stream, -+ SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - if (!i) - compiler->temp_id = id; - assert(id == compiler->temp_id + i); -@@ -5522,12 +5392,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil - reg.type = VKD3DSPR_IDXTEMP; - reg.idx[0].offset = temp->register_idx; - reg.idx[1].offset = ~0u; -+ reg.idx_count = 1; - - function_location = spirv_compiler_get_current_function_location(compiler); - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); - - id = spirv_compiler_emit_array_variable(compiler, &builder->function_stream, -- SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, temp->register_size); -+ SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, &temp->register_size, 1); - - spirv_compiler_emit_register_debug_name(builder, id, ®); - -@@ -5692,50 +5563,55 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - return var_id; - } - --static void spirv_compiler_emit_dcl_constant_buffer(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes) - { -- const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; - const SpvStorageClass storage_class = SpvStorageClassUniform; -- const struct vkd3d_shader_register *reg = &cb->src.reg; - struct vkd3d_push_constant_buffer_binding *push_cb; - struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_symbol reg_symbol; -+ unsigned int size; -+ -+ struct vkd3d_shader_register reg = -+ { -+ .type = VKD3DSPR_CONSTBUFFER, -+ .idx[0].offset = register_id, -+ .idx_count = 1, -+ }; - -- assert(!(instruction->flags & ~VKD3DSI_INDEXED_DYNAMIC)); -+ size = size_in_bytes / (VKD3D_VEC4_SIZE * sizeof(uint32_t)); - -- if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, cb))) -+ if ((push_cb = spirv_compiler_find_push_constant_buffer(compiler, range))) - { - /* Push constant buffers are handled in - * spirv_compiler_emit_push_constant_buffers(). - */ -- unsigned int cb_size_in_bytes = cb->size * VKD3D_VEC4_SIZE * sizeof(uint32_t); -- push_cb->reg = *reg; -- push_cb->size = cb->size; -- if (cb_size_in_bytes > push_cb->pc.size) -+ push_cb->reg = reg; -+ push_cb->size = size; -+ if (size_in_bytes > push_cb->pc.size) - { - WARN("Constant buffer size %u exceeds push constant size %u.\n", -- cb_size_in_bytes, push_cb->pc.size); -+ size_in_bytes, push_cb->pc.size); - } - return; - } - - vec4_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); -- length_id = spirv_compiler_get_constant_uint(compiler, cb->size); -+ length_id = spirv_compiler_get_constant_uint(compiler, size); - array_type_id = vkd3d_spirv_build_op_type_array(builder, vec4_id, length_id); - vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, 16); - - struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1); - vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); - vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0); -- vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", cb->size); -+ vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); - - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, -- reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); -+ ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); - -- vkd3d_symbol_make_register(®_symbol, reg); -+ vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); - reg_symbol.descriptor_array = var_info.array_symbol; -@@ -5776,29 +5652,34 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi - spirv_compiler_put_symbol(compiler, ®_symbol); - } - --static void spirv_compiler_emit_dcl_sampler(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_register_range *range, unsigned int register_id) - { -- const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; - const SpvStorageClass storage_class = SpvStorageClassUniformConstant; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_register *reg = &sampler->src.reg; - struct vkd3d_descriptor_variable_info var_info; - struct vkd3d_symbol reg_symbol; - uint32_t type_id, var_id; - -- vkd3d_symbol_make_sampler(®_symbol, reg); -- reg_symbol.info.sampler.range = sampler->range; -+ const struct vkd3d_shader_register reg = -+ { -+ .type = VKD3DSPR_SAMPLER, -+ .idx[0].offset = register_id, -+ .idx_count = 1, -+ }; -+ -+ vkd3d_symbol_make_sampler(®_symbol, ®); -+ reg_symbol.info.sampler.range = *range; - spirv_compiler_put_symbol(compiler, ®_symbol); - -- if (spirv_compiler_has_combined_sampler(compiler, NULL, sampler)) -+ if (spirv_compiler_has_combined_sampler_for_sampler(compiler, range)) - return; - - type_id = vkd3d_spirv_get_op_type_sampler(builder); -- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, -- &sampler->range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); -+ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -+ range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); - -- vkd3d_symbol_make_register(®_symbol, reg); -+ vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); - reg_symbol.descriptor_array = var_info.array_symbol; -@@ -5843,13 +5724,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty - } - } - --static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( -+static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( - struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, - const struct vkd3d_shader_register_range *range) - { -- const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; - unsigned int register_last = (range->last == ~0u) ? range->first : range->last; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - unsigned int i; - - for (i = 0; i < descriptor_info->descriptor_count; ++i) -@@ -5869,7 +5750,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler - bool raw_structured, uint32_t depth) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - bool uav_read, uav_atomics; - uint32_t sampled_type_id; - SpvImageFormat format; -@@ -5904,7 +5785,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - const struct vkd3d_shader_combined_resource_sampler *current; - uint32_t image_type_id, type_id, ptr_type_id, var_id; - enum vkd3d_shader_binding_flag resource_type_flag; -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - struct vkd3d_symbol symbol; - unsigned int i; - bool depth; -@@ -5980,20 +5861,30 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi - } - - static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, -- const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, -- enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw) -+ const struct vkd3d_shader_register_range *range, unsigned int register_id, -+ unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, -+ enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) - { - struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - SpvStorageClass storage_class = SpvStorageClassUniformConstant; - uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; -- const struct vkd3d_shader_register *reg = &resource->reg.reg; - const struct vkd3d_spirv_resource_type *resource_type_info; - enum vkd3d_shader_component_type sampled_type; - struct vkd3d_symbol resource_symbol; -- bool is_uav; - -- is_uav = reg->type == VKD3DSPR_UAV; -+ struct vkd3d_shader_register reg = -+ { -+ .type = is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, -+ .idx[0].offset = register_id, -+ .idx_count = 1, -+ }; -+ -+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && sample_count == 1) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; -+ - if (!(resource_type_info = spirv_compiler_enable_resource_type(compiler, - resource_type, is_uav))) - { -@@ -6001,11 +5892,11 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - return; - } - -- sampled_type = vkd3d_component_type_from_data_type(resource_data_type); -+ sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); - -- if (spirv_compiler_has_combined_sampler(compiler, resource, NULL)) -+ if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) - { -- spirv_compiler_emit_combined_sampler_declarations(compiler, reg, &resource->range, -+ spirv_compiler_emit_combined_sampler_declarations(compiler, ®, range, - resource_type, sampled_type, structure_stride, raw, resource_type_info); - return; - } -@@ -6028,19 +5919,18 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - } - else - { -- type_id = spirv_compiler_get_image_type_id(compiler, reg, &resource->range, -+ type_id = spirv_compiler_get_image_type_id(compiler, ®, range, - resource_type_info, sampled_type, structure_stride || raw, 0); - } - -- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, -- &resource->range, resource_type, false, &var_info); -+ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -+ range, resource_type, false, &var_info); - - if (is_uav) - { -- const struct vkd3d_shader_descriptor_info *d; -+ const struct vkd3d_shader_descriptor_info1 *d; - -- d = spirv_compiler_get_descriptor_info(compiler, -- VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); -+ d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - - if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); -@@ -6072,15 +5962,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - type_id = struct_id; - } - -- counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, reg, -- &resource->range, resource_type, true, &counter_var_info); -+ counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, -+ type_id, ®, range, resource_type, true, &counter_var_info); - } - } - -- vkd3d_symbol_make_resource(&resource_symbol, reg); -+ vkd3d_symbol_make_resource(&resource_symbol, ®); - resource_symbol.id = var_id; - resource_symbol.descriptor_array = var_info.array_symbol; -- resource_symbol.info.resource.range = resource->range; -+ resource_symbol.info.resource.range = *range; - resource_symbol.info.resource.sampled_type = sampled_type; - resource_symbol.info.resource.type_id = type_id; - resource_symbol.info.resource.resource_type_info = resource_type_info; -@@ -6093,52 +5983,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - spirv_compiler_put_symbol(compiler, &resource_symbol); - } - --static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; -- uint32_t flags = instruction->flags; -- -- /* We don't distinguish between APPEND and COUNTER UAVs. */ -- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; -- if (flags) -- FIXME("Unhandled UAV flags %#x.\n", flags); -- -- spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, -- semantic->resource_type, semantic->resource_data_type[0], 0, false); --} -- --static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_raw_resource *resource = &instruction->declaration.raw_resource; -- uint32_t flags = instruction->flags; -- -- /* We don't distinguish between APPEND and COUNTER UAVs. */ -- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; -- if (flags) -- FIXME("Unhandled UAV flags %#x.\n", flags); -- -- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, 0, true); --} -- --static void spirv_compiler_emit_dcl_resource_structured(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_structured_resource *resource = &instruction->declaration.structured_resource; -- unsigned int stride = resource->byte_stride; -- uint32_t flags = instruction->flags; -- -- /* We don't distinguish between APPEND and COUNTER UAVs. */ -- flags &= ~VKD3DSUF_ORDER_PRESERVING_COUNTER; -- if (flags) -- FIXME("Unhandled UAV flags %#x.\n", flags); -- -- spirv_compiler_emit_resource_declaration(compiler, &resource->resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_DATA_UINT, stride / 4, false); --} -- - static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) - { -@@ -6185,10 +6029,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; -- const struct vkd3d_shader_phase *phase; - -- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) -- spirv_compiler_emit_shader_phase_input(compiler, phase, dst); -+ if (spirv_compiler_get_current_shader_phase(compiler)) -+ spirv_compiler_emit_shader_phase_input(compiler, dst); - else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) - spirv_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); - else -@@ -6224,7 +6067,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, - { - const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - -- if (vkd3d_shader_register_is_output(&dst->reg)) -+ if (vkd3d_shader_register_is_output(&dst->reg) -+ || (is_in_fork_or_join_phase(compiler) && vkd3d_shader_register_is_patch_constant(&dst->reg))) - spirv_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); - else - spirv_compiler_emit_output_register(compiler, dst); -@@ -6242,64 +6086,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, - spirv_compiler_emit_output(compiler, dst, sysval); - } - --static bool spirv_compiler_check_index_range(struct spirv_compiler *compiler, -- const struct vkd3d_shader_index_range *range) --{ -- const struct vkd3d_shader_register *reg = &range->dst.reg; -- struct vkd3d_shader_register_info reg_info; -- struct vkd3d_shader_register current_reg; -- struct vkd3d_symbol reg_symbol; -- unsigned int i; -- uint32_t id; -- -- current_reg = *reg; -- vkd3d_symbol_make_register(®_symbol, ¤t_reg); -- if (!spirv_compiler_get_register_info(compiler, ¤t_reg, ®_info)) -- { -- ERR("Failed to get register info.\n"); -- return false; -- } -- -- /* FIXME: We should check if it's an array. */ -- if (!reg_info.is_aggregate) -- { -- FIXME("Unhandled register %#x.\n", reg->type); -- return false; -- } -- id = reg_info.id; -- -- for (i = reg->idx[0].offset; i < reg->idx[0].offset + range->register_count; ++i) -- { -- current_reg.idx[0].offset = i; -- vkd3d_symbol_make_register(®_symbol, ¤t_reg); -- -- if (range->dst.write_mask != reg_info.write_mask -- || vkd3d_write_mask_component_count(reg_info.write_mask) != 1) -- { -- FIXME("Unhandled index range write mask %#x (%#x).\n", -- range->dst.write_mask, reg_info.write_mask); -- return false; -- } -- -- if (reg_info.id != id) -- { -- FIXME("Unhandled index range %#x, %u.\n", reg->type, i); -- return false; -- } -- } -- -- return true; --} -- --static void spirv_compiler_emit_dcl_index_range(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; -- -- if (!spirv_compiler_check_index_range(compiler, range)) -- FIXME("Ignoring dcl_index_range %#x %u.\n", range->dst.reg.type, range->register_count); --} -- - static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { -@@ -6495,157 +6281,80 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler - SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); - } - --static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler, -- const struct vkd3d_shader_phase *phase) -+static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); -+ -+static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) - { -- const struct vkd3d_shader_signature *signature = compiler->output_signature; -+ const struct shader_signature *signature = &compiler->output_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- struct vkd3d_symbol reg_symbol, *symbol; -- struct vkd3d_shader_register reg; -- struct rb_entry *entry; -- unsigned int i; - -- vkd3d_spirv_build_op_function_end(builder); -+ if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) -+ spirv_compiler_emit_default_control_point_phase(compiler); - -- compiler->temp_id = 0; -- compiler->temp_count = 0; -+ vkd3d_spirv_build_op_function_end(builder); - -- /* -- * vocp inputs in fork and join shader phases are outputs of the control -- * point phase. Reinsert symbols for vocp registers while leaving the -- * control point phase. -- */ -- if (is_control_point_phase(phase)) -+ if (is_in_control_point_phase(compiler)) - { - if (compiler->epilogue_function_id) - { -- spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, phase, "_epilogue"); -+ spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, "_epilogue"); - spirv_compiler_emit_shader_epilogue_function(compiler); - } - -- memset(®, 0, sizeof(reg)); -- reg.idx[1].offset = ~0u; -- - /* Fork and join phases share output registers (patch constants). - * Control point phase has separate output registers. */ - memset(compiler->output_info, 0, signature->element_count * sizeof(*compiler->output_info)); - memset(compiler->private_output_variable, 0, sizeof(compiler->private_output_variable)); - memset(compiler->private_output_variable_array_idx, 0, sizeof(compiler->private_output_variable_array_idx)); - memset(compiler->private_output_variable_write_mask, 0, sizeof(compiler->private_output_variable_write_mask)); -- -- for (i = 0; i < signature->element_count; ++i) -- { -- const struct vkd3d_shader_signature_element *e = &signature->elements[i]; -- -- reg.type = VKD3DSPR_OUTPUT; -- reg.idx[0].offset = e->register_index; -- vkd3d_symbol_make_register(®_symbol, ®); -- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) -- { -- rb_remove(&compiler->symbol_table, entry); -- -- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); -- -- reg.type = VKD3DSPR_OUTCONTROLPOINT; -- reg.idx[1].offset = reg.idx[0].offset; -- reg.idx[0].offset = compiler->output_control_point_count; -- vkd3d_symbol_make_register(symbol, ®); -- symbol->info.reg.is_aggregate = false; -- -- if (rb_put(&compiler->symbol_table, symbol, entry) == -1) -- { -- ERR("Failed to insert vocp symbol entry (%s).\n", debug_vkd3d_symbol(symbol)); -- vkd3d_symbol_free(entry, NULL); -- } -- } -- } -- } -- -- if (phase->instance_count) -- { -- memset(®, 0, sizeof(reg)); -- reg.type = phase->type == VKD3DSIH_HS_FORK_PHASE ? VKD3DSPR_FORKINSTID : VKD3DSPR_JOININSTID; -- reg.idx[0].offset = ~0u; -- reg.idx[1].offset = ~0u; -- vkd3d_symbol_make_register(®_symbol, ®); -- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) -- { -- rb_remove(&compiler->symbol_table, entry); -- vkd3d_symbol_free(entry, NULL); -- } - } - } - - static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { -- const struct vkd3d_shader_phase *previous_phase; -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ uint32_t function_id, void_id, function_type_id; - struct vkd3d_shader_phase *phase; - -- if ((previous_phase = spirv_compiler_get_current_shader_phase(compiler))) -- spirv_compiler_leave_shader_phase(compiler, previous_phase); -- -- if (!vkd3d_array_reserve((void **)&compiler->shader_phases, &compiler->shader_phases_size, -- compiler->shader_phase_count + 1, sizeof(*compiler->shader_phases))) -- return; -- phase = &compiler->shader_phases[compiler->shader_phase_count]; -- -- phase->type = instruction->handler_idx; -- phase->idx = compiler->shader_phase_count; -- phase->instance_count = 0; -- phase->function_id = 0; -- phase->instance_id = 0; -- phase->function_location = 0; -+ assert(compiler->phase != instruction->handler_idx); - -- ++compiler->shader_phase_count; --} -+ if (!is_in_default_phase(compiler)) -+ spirv_compiler_leave_shader_phase(compiler); - --static int spirv_compiler_emit_shader_phase_instance_count(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- struct vkd3d_shader_phase *phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; -+ function_id = vkd3d_spirv_alloc_id(builder); - -- if (!compiler->shader_phase_count -- || (phase->type != VKD3DSIH_HS_FORK_PHASE && phase->type != VKD3DSIH_HS_JOIN_PHASE) -- || phase->function_id) -- { -- WARN("Unexpected dcl_hs_{fork,join}_phase_instance_count instruction.\n"); -- return VKD3D_ERROR_INVALID_SHADER; -- } -- -- phase->instance_count = instruction->declaration.count; -- -- spirv_compiler_begin_shader_phase(compiler, phase); -- -- return VKD3D_OK; --} -+ void_id = vkd3d_spirv_get_op_type_void(builder); -+ function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, NULL, 0); -+ vkd3d_spirv_build_op_function(builder, void_id, function_id, -+ SpvFunctionControlMaskNone, function_type_id); - --static const struct vkd3d_shader_phase *spirv_compiler_get_control_point_phase( -- struct spirv_compiler *compiler) --{ -- const struct vkd3d_shader_phase *phase; -+ vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - -- if (compiler->shader_phase_count < 1) -- return NULL; -+ compiler->phase = instruction->handler_idx; -+ spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); - -- phase = &compiler->shader_phases[0]; -- if (is_control_point_phase(phase)) -- return phase; -+ phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ ? &compiler->control_point_phase : &compiler->patch_constant_phase; -+ phase->function_id = function_id; -+ phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); - -- return NULL; -+ if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ compiler->emit_default_control_point_phase = instruction->flags; - } - - static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) - { -- const struct vkd3d_shader_signature *output_signature = compiler->output_signature; -- const struct vkd3d_shader_signature *input_signature = compiler->input_signature; -+ const struct shader_signature *output_signature = &compiler->output_signature; -+ const struct shader_signature *input_signature = &compiler->input_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - enum vkd3d_shader_component_type component_type; -- uint32_t input_id, output_id, dst_id, src_id; - struct vkd3d_shader_src_param invocation; - struct vkd3d_shader_register input_reg; - uint32_t type_id, output_ptr_type_id; -+ uint32_t input_id, output_id, dst_id; - unsigned int component_count; -+ unsigned int array_sizes[2]; - uint32_t invocation_id; - unsigned int i; - -@@ -6657,6 +6366,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile - invocation.reg.idx[0].offset = ~0u; - invocation.reg.idx[1].offset = ~0u; - invocation.reg.idx[2].offset = ~0u; -+ invocation.reg.idx_count = 0; - invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; - - memset(&input_reg, 0, sizeof(input_reg)); -@@ -6664,37 +6374,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile - input_reg.data_type = VKD3D_DATA_FLOAT; - input_reg.idx[0].rel_addr = &invocation; - input_reg.idx[2].offset = ~0u; -+ input_reg.idx_count = 2; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - - assert(input_signature->element_count == output_signature->element_count); - for (i = 0; i < output_signature->element_count; ++i) - { -- const struct vkd3d_shader_signature_element *output = &output_signature->elements[i]; -- const struct vkd3d_shader_signature_element *input = &input_signature->elements[i]; -+ const struct signature_element *output = &output_signature->elements[i]; -+ const struct signature_element *input = &input_signature->elements[i]; - - assert(input->mask == output->mask); - assert(input->component_type == output->component_type); - -- input_reg.idx[1].offset = input->register_index; -+ input_reg.idx[1].offset = i; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); -- src_id = vkd3d_spirv_build_op_load(builder, type_id, input_id, SpvMemoryAccessMaskNone); - - component_type = output->component_type; - component_count = vkd3d_write_mask_component_count(output->mask); -- output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -- SpvStorageClassOutput, component_type, component_count, compiler->output_control_point_count); -+ type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); -+ if ((array_sizes[0] = (input->register_count > 1) ? input->register_count : 0)) -+ type_id = vkd3d_spirv_get_op_type_array(builder, type_id, spirv_compiler_get_constant_uint(compiler, -+ array_sizes[0])); -+ -+ array_sizes[1] = compiler->output_control_point_count; -+ output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, SpvStorageClassOutput, -+ component_type, component_count, array_sizes, 2); - vkd3d_spirv_add_iface_variable(builder, output_id); - vkd3d_spirv_build_op_decorate1(builder, output_id, SpvDecorationLocation, output->register_index); - vkd3d_spirv_build_op_name(builder, output_id, "vocp%u", output->register_index); - -- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); - dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_id, invocation_id); - -- spirv_compiler_emit_store(compiler, dst_id, output->mask, -- component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_ALL, src_id); -+ vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); - } -+ -+ vkd3d_spirv_build_op_return(builder); - } - - static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, -@@ -6723,95 +6438,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi - SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); - } - --static void spirv_compiler_emit_hull_shader_input_initialisation(struct spirv_compiler *compiler) --{ -- uint32_t type_id, length_id, register_index_id, src_array_id, dst_array_id, vicp_id, tmp_id; -- const struct vkd3d_shader_signature *signature = compiler->input_signature; -- uint32_t src_type_id, dst_type_id, src_id, dst_id, point_index_id; -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_signature_element *element; -- enum vkd3d_shader_input_sysval_semantic sysval; -- const struct vkd3d_spirv_builtin *builtin; -- struct vkd3d_symbol *symbol, symbol_key; -- unsigned int register_count, i, j; -- struct vkd3d_shader_register r; -- struct rb_entry *entry; -- uint32_t indices[2]; -- -- for (i = 0, register_count = 0; i < signature->element_count; ++i) -- { -- register_count = max(register_count, signature->elements[i].register_index + 1); -- } -- -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); -- length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); -- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); -- type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); -- -- memset(&r, 0, sizeof(r)); -- r.type = VKD3DSPR_INPUT; -- r.idx[0].offset = 0; -- r.idx[1].offset = ~0u; -- vkd3d_symbol_make_register(&symbol_key, &r); -- -- for (i = 0; i < signature->element_count; ++i) -- { -- element = &signature->elements[i]; -- -- symbol_key.key.reg.idx = element->register_index; -- entry = rb_get(&compiler->symbol_table, &symbol_key); -- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); -- -- vicp_id = symbol->id; -- register_index_id = spirv_compiler_get_constant_uint(compiler, element->register_index); -- dst_array_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, type_id, vicp_id, register_index_id); -- -- if (element->sysval_semantic) -- { -- sysval = vkd3d_siv_from_sysval(element->sysval_semantic); -- builtin = get_spirv_builtin_for_sysval(compiler, sysval); -- src_array_id = spirv_compiler_emit_builtin_variable(compiler, builtin, -- SpvStorageClassInput, compiler->input_control_point_count); -- -- if (builtin->component_count == 4) -- { -- vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); -- } -- else -- { -- tmp_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, builtin->component_count); -- src_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, tmp_id); -- dst_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, tmp_id); -- -- for (j = 0; j < compiler->input_control_point_count; ++j) -- { -- point_index_id = spirv_compiler_get_constant_uint(compiler, j); -- src_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, -- src_type_id, src_array_id, point_index_id); -- -- indices[0] = point_index_id; -- indices[1] = spirv_compiler_get_constant_uint(compiler, 0); -- dst_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, -- dst_type_id, dst_array_id, indices, 2); -- -- vkd3d_spirv_build_op_copy_memory(builder, dst_id, src_id, SpvMemoryAccessMaskNone); -- } -- } -- } -- else -- { -- src_array_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, -- SpvStorageClassInput, VKD3D_SHADER_COMPONENT_FLOAT, 4, compiler->input_control_point_count); -- vkd3d_spirv_add_iface_variable(builder, src_array_id); -- vkd3d_spirv_build_op_decorate1(builder, src_array_id, SpvDecorationLocation, element->register_index); -- vkd3d_spirv_build_op_name(builder, src_array_id, "v%u", element->register_index); -- -- vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); -- } -- symbol->info.reg.dcl_mask |= element->mask; -- } --} -- - static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler *compiler) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -@@ -6854,46 +6480,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler - static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_phase *control_point_phase, *phase; -- uint32_t phase_instance_id; -- unsigned int i, j; - uint32_t void_id; - - vkd3d_spirv_builder_begin_main_function(builder); - -- spirv_compiler_emit_hull_shader_input_initialisation(compiler); -- - void_id = vkd3d_spirv_get_op_type_void(builder); - -- if ((control_point_phase = spirv_compiler_get_control_point_phase(compiler))) -- vkd3d_spirv_build_op_function_call(builder, void_id, control_point_phase->function_id, NULL, 0); -- else -- spirv_compiler_emit_default_control_point_phase(compiler); -+ vkd3d_spirv_build_op_function_call(builder, void_id, compiler->control_point_phase.function_id, NULL, 0); - - if (compiler->use_vocp) - spirv_compiler_emit_hull_shader_barrier(compiler); - -- for (i = 0; i < compiler->shader_phase_count; ++i) -- { -- phase = &compiler->shader_phases[i]; -- if (is_control_point_phase(phase)) -- continue; -- -- if (phase->instance_count) -- { -- for (j = 0; j < phase->instance_count; ++j) -- { -- phase_instance_id = spirv_compiler_get_constant_uint(compiler, j); -- vkd3d_spirv_build_op_function_call(builder, -- void_id, phase->function_id, &phase_instance_id, 1); -- } -- } -- else -- { -- vkd3d_spirv_build_op_function_call(builder, void_id, phase->function_id, NULL, 0); -- } -- } -- -+ /* TODO: only call the patch constant function for invocation 0. The simplest way -+ * is to avoid use of private variables there, otherwise we would need a separate -+ * patch constant epilogue also only called from invocation 0. */ -+ vkd3d_spirv_build_op_function_call(builder, void_id, compiler->patch_constant_phase.function_id, NULL, 0); - spirv_compiler_emit_shader_epilogue_invocation(compiler); - vkd3d_spirv_build_op_return(builder); - vkd3d_spirv_build_op_function_end(builder); -@@ -7093,7 +6694,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - uint32_t components[VKD3D_VEC4_SIZE]; - unsigned int i, component_count; - -- if (src->reg.type == VKD3DSPR_IMMCONST || src->reg.type == VKD3DSPR_IMMCONST64 || dst->modifiers || src->modifiers) -+ if (register_is_constant_or_undef(&src->reg) || dst->modifiers || src->modifiers) - goto general_implementation; - - spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); -@@ -7575,10 +7176,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co - static void spirv_compiler_emit_return(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { -- const struct vkd3d_shader_phase *phase = spirv_compiler_get_current_shader_phase(compiler); - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - -- if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (!phase || is_control_point_phase(phase))) -+ if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) -+ || is_in_control_point_phase(compiler))) - spirv_compiler_emit_shader_epilogue_invocation(compiler); - - vkd3d_spirv_build_op_return(builder); -@@ -7851,7 +7452,13 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c - assert(compiler->control_flow_depth); - assert(cf_info->current_block == VKD3D_BLOCK_SWITCH); - -- assert(src->swizzle == VKD3D_SHADER_NO_SWIZZLE && src->reg.type == VKD3DSPR_IMMCONST); -+ if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) -+ { -+ WARN("Unexpected src swizzle %#x.\n", src->swizzle); -+ spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE, -+ "The swizzle for a switch case value is not scalar."); -+ } -+ assert(src->reg.type == VKD3DSPR_IMMCONST); - value = *src->reg.u.immconst_uint; - - if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.case_blocks, &cf_info->u.switch_.case_blocks_size, -@@ -7972,12 +7579,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c - - if (cf_info) - cf_info->inside_block = false; -+ else -+ compiler->main_block_open = false; - break; - - case VKD3DSIH_RETP: - spirv_compiler_emit_retc(compiler, instruction); - break; - -+ case VKD3DSIH_DISCARD: - case VKD3DSIH_TEXKILL: - spirv_compiler_emit_kill(compiler, instruction); - break; -@@ -8256,7 +7866,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, - image_operands[image_operand_count++] = spirv_compiler_emit_texel_offset(compiler, - instruction, image.resource_type_info); - } -- if (multisample) -+ if (multisample && image.resource_type_info->ms) - { - operands_mask |= SpvImageOperandsSampleMask; - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, -@@ -9521,58 +9131,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_end_primitive(builder); - } - --static void spirv_compiler_emit_hull_shader_inputs(struct spirv_compiler *compiler) --{ -- const struct vkd3d_shader_signature *signature = compiler->input_signature; -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- uint32_t type_id, length_id, vicp_id, vicp_type_id; -- unsigned int register_count, register_idx, i; -- struct vkd3d_shader_register r; -- struct vkd3d_symbol symbol; -- struct rb_entry *entry; -- -- for (i = 0, register_count = 0; i < signature->element_count; ++i) -- { -- register_count = max(register_count, signature->elements[i].register_index + 1); -- } -- -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); -- length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); -- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); -- length_id = spirv_compiler_get_constant_uint(compiler, register_count); -- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); -- vicp_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); -- -- vicp_id = vkd3d_spirv_build_op_variable(builder, -- &builder->global_stream, vicp_type_id, SpvStorageClassPrivate, 0); -- vkd3d_spirv_build_op_name(builder, vicp_id, "vicp"); -- -- memset(&r, 0, sizeof(r)); -- r.type = VKD3DSPR_INPUT; -- r.idx[0].offset = 0; -- r.idx[1].offset = ~0u; -- vkd3d_symbol_make_register(&symbol, &r); -- -- for (i = 0; i < signature->element_count; ++i) -- { -- register_idx = signature->elements[i].register_index; -- -- symbol.key.reg.idx = register_idx; -- if ((entry = rb_get(&compiler->symbol_table, &symbol))) -- { -- struct vkd3d_symbol *s = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); -- s->info.reg.dcl_mask |= signature->elements[i].mask; -- continue; -- } -- -- vkd3d_symbol_set_register_info(&symbol, vicp_id, SpvStorageClassPrivate, -- VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); -- symbol.info.reg.dcl_mask = signature->elements[i].mask; -- symbol.info.reg.is_aggregate = true; -- spirv_compiler_put_symbol(compiler, &symbol); -- } --} -- - /* This function is called after declarations are processed. */ - static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - { -@@ -9581,8 +9139,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) - if (compiler->xfb_info && compiler->xfb_info->element_count - && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) - spirv_compiler_emit_point_size(compiler); -- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) -- spirv_compiler_emit_hull_shader_inputs(compiler); - } - - static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) -@@ -9607,33 +9163,12 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_GLOBAL_FLAGS: - spirv_compiler_emit_dcl_global_flags(compiler, instruction); - break; -- case VKD3DSIH_DCL_TEMPS: -- spirv_compiler_emit_dcl_temps(compiler, instruction); -- break; - case VKD3DSIH_DCL_INDEXABLE_TEMP: - spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); - break; -- case VKD3DSIH_DCL_CONSTANT_BUFFER: -- spirv_compiler_emit_dcl_constant_buffer(compiler, instruction); -- break; - case VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER: - spirv_compiler_emit_dcl_immediate_constant_buffer(compiler, instruction); - break; -- case VKD3DSIH_DCL_SAMPLER: -- spirv_compiler_emit_dcl_sampler(compiler, instruction); -- break; -- case VKD3DSIH_DCL: -- case VKD3DSIH_DCL_UAV_TYPED: -- spirv_compiler_emit_dcl_resource(compiler, instruction); -- break; -- case VKD3DSIH_DCL_RESOURCE_RAW: -- case VKD3DSIH_DCL_UAV_RAW: -- spirv_compiler_emit_dcl_resource_raw(compiler, instruction); -- break; -- case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -- case VKD3DSIH_DCL_UAV_STRUCTURED: -- spirv_compiler_emit_dcl_resource_structured(compiler, instruction); -- break; - case VKD3DSIH_DCL_TGSM_RAW: - spirv_compiler_emit_dcl_tgsm_raw(compiler, instruction); - break; -@@ -9660,9 +9195,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_OUTPUT_SIV: - spirv_compiler_emit_dcl_output_siv(compiler, instruction); - break; -- case VKD3DSIH_DCL_INDEX_RANGE: -- spirv_compiler_emit_dcl_index_range(compiler, instruction); -- break; - case VKD3DSIH_DCL_STREAM: - spirv_compiler_emit_dcl_stream(compiler, instruction); - break; -@@ -9699,10 +9231,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_THREAD_GROUP: - spirv_compiler_emit_dcl_thread_group(compiler, instruction); - break; -- case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: -- case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: -- ret = spirv_compiler_emit_shader_phase_instance_count(compiler, instruction); -- break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -@@ -9826,6 +9354,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CONTINUE: - case VKD3DSIH_CONTINUEP: - case VKD3DSIH_DEFAULT: -+ case VKD3DSIH_DISCARD: - case VKD3DSIH_ELSE: - case VKD3DSIH_ENDIF: - case VKD3DSIH_ENDLOOP: -@@ -9935,7 +9464,16 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CUT_STREAM: - spirv_compiler_emit_cut_stream(compiler, instruction); - break; -+ case VKD3DSIH_DCL: -+ case VKD3DSIH_DCL_CONSTANT_BUFFER: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: -+ case VKD3DSIH_DCL_RESOURCE_RAW: -+ case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -+ case VKD3DSIH_DCL_SAMPLER: -+ case VKD3DSIH_DCL_TEMPS: -+ case VKD3DSIH_DCL_UAV_RAW: -+ case VKD3DSIH_DCL_UAV_STRUCTURED: -+ case VKD3DSIH_DCL_UAV_TYPED: - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_NOP: - /* nothing to do */ -@@ -9947,28 +9485,102 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - return ret; - } - --int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, -+static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < compiler->scan_descriptor_info->descriptor_count; ++i) -+ { -+ const struct vkd3d_shader_descriptor_info1 *descriptor = &compiler->scan_descriptor_info->descriptors[i]; -+ struct vkd3d_shader_register_range range; -+ -+ range.first = descriptor->register_index; -+ if (descriptor->count == ~0u) -+ range.last = ~0u; -+ else -+ range.last = descriptor->register_index + descriptor->count - 1; -+ range.space = descriptor->register_space; -+ -+ switch (descriptor->type) -+ { -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: -+ spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -+ spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: -+ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, -+ descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, -+ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); -+ break; -+ -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: -+ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, -+ descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, -+ descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ } -+} -+ -+static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv) - { -- const struct vkd3d_shader_instruction_array *instructions = &parser->instructions; - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_phase *phase; -+ struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; -+ struct vkd3d_shader_instruction_array instructions; - enum vkd3d_result result = VKD3D_OK; - unsigned int i; - -+ if (parser->shader_desc.temp_count) -+ spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); -+ -+ spirv_compiler_emit_descriptor_declarations(compiler); -+ - compiler->location.column = 0; -- for (i = 0; i < instructions->count; ++i) -+ compiler->location.line = 1; -+ -+ if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) -+ return result; -+ -+ instructions = parser->instructions; -+ memset(&parser->instructions, 0, sizeof(parser->instructions)); -+ -+ compiler->input_signature = shader_desc->input_signature; -+ compiler->output_signature = shader_desc->output_signature; -+ compiler->patch_constant_signature = shader_desc->patch_constant_signature; -+ memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); -+ memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); -+ memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); -+ -+ if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) -+ spirv_compiler_emit_shader_signature_outputs(compiler); -+ -+ for (i = 0; i < instructions.count && result >= 0; ++i) - { - compiler->location.line = i + 1; -- if ((result = spirv_compiler_handle_instruction(compiler, &instructions->elements[i])) < 0) -- return result; -+ result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); - } - -- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) -- spirv_compiler_leave_shader_phase(compiler, phase); -+ shader_instruction_array_destroy(&instructions); -+ -+ if (result < 0) -+ return result; -+ -+ if (compiler->main_block_open) -+ vkd3d_spirv_build_op_return(builder); -+ -+ if (!is_in_default_phase(compiler)) -+ spirv_compiler_leave_shader_phase(compiler); - else - vkd3d_spirv_build_op_function_end(builder); - -@@ -10023,23 +9635,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - return VKD3D_OK; - } - --void spirv_compiler_destroy(struct spirv_compiler *compiler) -+int spirv_compile(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, -+ const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -- vkd3d_free(compiler->control_flow_info); -- -- vkd3d_free(compiler->output_info); -- -- vkd3d_free(compiler->push_constants); -- vkd3d_free(compiler->descriptor_offset_ids); -- -- vkd3d_spirv_builder_free(&compiler->spirv_builder); -- -- rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); -+ struct spirv_compiler *spirv_compiler; -+ int ret; - -- vkd3d_free(compiler->shader_phases); -- vkd3d_free(compiler->spec_constants); -+ if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, -+ compile_info, scan_descriptor_info, message_context, &parser->location))) -+ { -+ ERR("Failed to create SPIR-V compiler.\n"); -+ return VKD3D_ERROR; -+ } - -- vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); -+ ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); - -- vkd3d_free(compiler); -+ spirv_compiler_destroy(spirv_compiler); -+ return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -new file mode 100644 -index 00000000000..045fb6c5f64 ---- /dev/null -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -0,0 +1,5619 @@ -+/* -+ * TPF (Direct3D shader models 4 and 5 bytecode) support -+ * -+ * Copyright 2008-2009 Henri Verbeet for CodeWeavers -+ * Copyright 2010 Rico Schüller -+ * Copyright 2017 Józef Kucia for CodeWeavers -+ * Copyright 2019-2020 Zebediah Figura for CodeWeavers -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -+ */ -+ -+#include "hlsl.h" -+ -+#define SM4_MAX_SRC_COUNT 6 -+#define SM4_MAX_DST_COUNT 2 -+ -+STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); -+ -+#define VKD3D_SM4_PS 0x0000u -+#define VKD3D_SM4_VS 0x0001u -+#define VKD3D_SM4_GS 0x0002u -+#define VKD3D_SM5_HS 0x0003u -+#define VKD3D_SM5_DS 0x0004u -+#define VKD3D_SM5_CS 0x0005u -+#define VKD3D_SM4_LIB 0xfff0u -+ -+#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) -+ -+#define VKD3D_SM4_MODIFIER_MASK 0x3fu -+ -+#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 -+#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) -+ -+#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 -+#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) -+ -+#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 -+#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) -+ -+#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 -+#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) -+#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 -+#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) -+#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 -+#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) -+ -+#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 -+#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) -+ -+#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 -+#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) -+ -+#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 -+#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) -+ -+#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 -+#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) -+ -+#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 -+#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) -+ -+#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 -+#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) -+ -+#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 -+#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) -+ -+#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 -+#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) -+ -+#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 -+#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) -+ -+#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 -+#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) -+ -+#define VKD3D_SM5_PRECISE_SHIFT 19 -+#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) -+ -+#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 -+#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) -+ -+#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 -+#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu -+ -+#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 -+#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) -+ -+#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 -+#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) -+ -+#define VKD3D_SM5_TESSELLATOR_SHIFT 11 -+#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) -+ -+#define VKD3D_SM4_OPCODE_MASK 0xff -+ -+#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) -+ -+#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu -+ -+#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 -+#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -+ -+#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 -+#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) -+ -+#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 -+#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) -+ -+#define VKD3D_SM4_ADDRESSING_SHIFT2 28 -+#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) -+ -+#define VKD3D_SM4_ADDRESSING_SHIFT1 25 -+#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) -+ -+#define VKD3D_SM4_ADDRESSING_SHIFT0 22 -+#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) -+ -+#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 -+#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) -+ -+#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 -+#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) -+ -+#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 -+#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) -+ -+#define VKD3D_SM4_DIMENSION_SHIFT 0 -+#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) -+ -+#define VKD3D_SM4_WRITEMASK_SHIFT 4 -+#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) -+ -+#define VKD3D_SM4_SWIZZLE_SHIFT 4 -+#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) -+ -+#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -+#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) -+ -+#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 -+#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 -+ -+#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 -+ -+#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) -+ -+#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) -+ -+/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ -+#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 -+ -+enum vkd3d_sm4_opcode -+{ -+ VKD3D_SM4_OP_ADD = 0x00, -+ VKD3D_SM4_OP_AND = 0x01, -+ VKD3D_SM4_OP_BREAK = 0x02, -+ VKD3D_SM4_OP_BREAKC = 0x03, -+ VKD3D_SM4_OP_CASE = 0x06, -+ VKD3D_SM4_OP_CONTINUE = 0x07, -+ VKD3D_SM4_OP_CONTINUEC = 0x08, -+ VKD3D_SM4_OP_CUT = 0x09, -+ VKD3D_SM4_OP_DEFAULT = 0x0a, -+ VKD3D_SM4_OP_DERIV_RTX = 0x0b, -+ VKD3D_SM4_OP_DERIV_RTY = 0x0c, -+ VKD3D_SM4_OP_DISCARD = 0x0d, -+ VKD3D_SM4_OP_DIV = 0x0e, -+ VKD3D_SM4_OP_DP2 = 0x0f, -+ VKD3D_SM4_OP_DP3 = 0x10, -+ VKD3D_SM4_OP_DP4 = 0x11, -+ VKD3D_SM4_OP_ELSE = 0x12, -+ VKD3D_SM4_OP_EMIT = 0x13, -+ VKD3D_SM4_OP_ENDIF = 0x15, -+ VKD3D_SM4_OP_ENDLOOP = 0x16, -+ VKD3D_SM4_OP_ENDSWITCH = 0x17, -+ VKD3D_SM4_OP_EQ = 0x18, -+ VKD3D_SM4_OP_EXP = 0x19, -+ VKD3D_SM4_OP_FRC = 0x1a, -+ VKD3D_SM4_OP_FTOI = 0x1b, -+ VKD3D_SM4_OP_FTOU = 0x1c, -+ VKD3D_SM4_OP_GE = 0x1d, -+ VKD3D_SM4_OP_IADD = 0x1e, -+ VKD3D_SM4_OP_IF = 0x1f, -+ VKD3D_SM4_OP_IEQ = 0x20, -+ VKD3D_SM4_OP_IGE = 0x21, -+ VKD3D_SM4_OP_ILT = 0x22, -+ VKD3D_SM4_OP_IMAD = 0x23, -+ VKD3D_SM4_OP_IMAX = 0x24, -+ VKD3D_SM4_OP_IMIN = 0x25, -+ VKD3D_SM4_OP_IMUL = 0x26, -+ VKD3D_SM4_OP_INE = 0x27, -+ VKD3D_SM4_OP_INEG = 0x28, -+ VKD3D_SM4_OP_ISHL = 0x29, -+ VKD3D_SM4_OP_ISHR = 0x2a, -+ VKD3D_SM4_OP_ITOF = 0x2b, -+ VKD3D_SM4_OP_LABEL = 0x2c, -+ VKD3D_SM4_OP_LD = 0x2d, -+ VKD3D_SM4_OP_LD2DMS = 0x2e, -+ VKD3D_SM4_OP_LOG = 0x2f, -+ VKD3D_SM4_OP_LOOP = 0x30, -+ VKD3D_SM4_OP_LT = 0x31, -+ VKD3D_SM4_OP_MAD = 0x32, -+ VKD3D_SM4_OP_MIN = 0x33, -+ VKD3D_SM4_OP_MAX = 0x34, -+ VKD3D_SM4_OP_SHADER_DATA = 0x35, -+ VKD3D_SM4_OP_MOV = 0x36, -+ VKD3D_SM4_OP_MOVC = 0x37, -+ VKD3D_SM4_OP_MUL = 0x38, -+ VKD3D_SM4_OP_NE = 0x39, -+ VKD3D_SM4_OP_NOP = 0x3a, -+ VKD3D_SM4_OP_NOT = 0x3b, -+ VKD3D_SM4_OP_OR = 0x3c, -+ VKD3D_SM4_OP_RESINFO = 0x3d, -+ VKD3D_SM4_OP_RET = 0x3e, -+ VKD3D_SM4_OP_RETC = 0x3f, -+ VKD3D_SM4_OP_ROUND_NE = 0x40, -+ VKD3D_SM4_OP_ROUND_NI = 0x41, -+ VKD3D_SM4_OP_ROUND_PI = 0x42, -+ VKD3D_SM4_OP_ROUND_Z = 0x43, -+ VKD3D_SM4_OP_RSQ = 0x44, -+ VKD3D_SM4_OP_SAMPLE = 0x45, -+ VKD3D_SM4_OP_SAMPLE_C = 0x46, -+ VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, -+ VKD3D_SM4_OP_SAMPLE_LOD = 0x48, -+ VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, -+ VKD3D_SM4_OP_SAMPLE_B = 0x4a, -+ VKD3D_SM4_OP_SQRT = 0x4b, -+ VKD3D_SM4_OP_SWITCH = 0x4c, -+ VKD3D_SM4_OP_SINCOS = 0x4d, -+ VKD3D_SM4_OP_UDIV = 0x4e, -+ VKD3D_SM4_OP_ULT = 0x4f, -+ VKD3D_SM4_OP_UGE = 0x50, -+ VKD3D_SM4_OP_UMUL = 0x51, -+ VKD3D_SM4_OP_UMAX = 0x53, -+ VKD3D_SM4_OP_UMIN = 0x54, -+ VKD3D_SM4_OP_USHR = 0x55, -+ VKD3D_SM4_OP_UTOF = 0x56, -+ VKD3D_SM4_OP_XOR = 0x57, -+ VKD3D_SM4_OP_DCL_RESOURCE = 0x58, -+ VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, -+ VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, -+ VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, -+ VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, -+ VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, -+ VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, -+ VKD3D_SM4_OP_DCL_INPUT = 0x5f, -+ VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, -+ VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, -+ VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, -+ VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, -+ VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, -+ VKD3D_SM4_OP_DCL_OUTPUT = 0x65, -+ VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, -+ VKD3D_SM4_OP_DCL_TEMPS = 0x68, -+ VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, -+ VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, -+ VKD3D_SM4_OP_LOD = 0x6c, -+ VKD3D_SM4_OP_GATHER4 = 0x6d, -+ VKD3D_SM4_OP_SAMPLE_POS = 0x6e, -+ VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, -+ VKD3D_SM5_OP_HS_DECLS = 0x71, -+ VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, -+ VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, -+ VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, -+ VKD3D_SM5_OP_EMIT_STREAM = 0x75, -+ VKD3D_SM5_OP_CUT_STREAM = 0x76, -+ VKD3D_SM5_OP_FCALL = 0x78, -+ VKD3D_SM5_OP_BUFINFO = 0x79, -+ VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, -+ VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, -+ VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, -+ VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, -+ VKD3D_SM5_OP_GATHER4_C = 0x7e, -+ VKD3D_SM5_OP_GATHER4_PO = 0x7f, -+ VKD3D_SM5_OP_GATHER4_PO_C = 0x80, -+ VKD3D_SM5_OP_RCP = 0x81, -+ VKD3D_SM5_OP_F32TOF16 = 0x82, -+ VKD3D_SM5_OP_F16TOF32 = 0x83, -+ VKD3D_SM5_OP_COUNTBITS = 0x86, -+ VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, -+ VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, -+ VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, -+ VKD3D_SM5_OP_UBFE = 0x8a, -+ VKD3D_SM5_OP_IBFE = 0x8b, -+ VKD3D_SM5_OP_BFI = 0x8c, -+ VKD3D_SM5_OP_BFREV = 0x8d, -+ VKD3D_SM5_OP_SWAPC = 0x8e, -+ VKD3D_SM5_OP_DCL_STREAM = 0x8f, -+ VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, -+ VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, -+ VKD3D_SM5_OP_DCL_INTERFACE = 0x92, -+ VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, -+ VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, -+ VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, -+ VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, -+ VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, -+ VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, -+ VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, -+ VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, -+ VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, -+ VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, -+ VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, -+ VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, -+ VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, -+ VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, -+ VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, -+ VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, -+ VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, -+ VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, -+ VKD3D_SM5_OP_LD_RAW = 0xa5, -+ VKD3D_SM5_OP_STORE_RAW = 0xa6, -+ VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, -+ VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, -+ VKD3D_SM5_OP_ATOMIC_AND = 0xa9, -+ VKD3D_SM5_OP_ATOMIC_OR = 0xaa, -+ VKD3D_SM5_OP_ATOMIC_XOR = 0xab, -+ VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, -+ VKD3D_SM5_OP_ATOMIC_IADD = 0xad, -+ VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, -+ VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, -+ VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, -+ VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, -+ VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, -+ VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, -+ VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, -+ VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, -+ VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, -+ VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, -+ VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, -+ VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, -+ VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, -+ VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, -+ VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, -+ VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, -+ VKD3D_SM5_OP_SYNC = 0xbe, -+ VKD3D_SM5_OP_DADD = 0xbf, -+ VKD3D_SM5_OP_DMAX = 0xc0, -+ VKD3D_SM5_OP_DMIN = 0xc1, -+ VKD3D_SM5_OP_DMUL = 0xc2, -+ VKD3D_SM5_OP_DEQ = 0xc3, -+ VKD3D_SM5_OP_DGE = 0xc4, -+ VKD3D_SM5_OP_DLT = 0xc5, -+ VKD3D_SM5_OP_DNE = 0xc6, -+ VKD3D_SM5_OP_DMOV = 0xc7, -+ VKD3D_SM5_OP_DMOVC = 0xc8, -+ VKD3D_SM5_OP_DTOF = 0xc9, -+ VKD3D_SM5_OP_FTOD = 0xca, -+ VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, -+ VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, -+ VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, -+ VKD3D_SM5_OP_DDIV = 0xd2, -+ VKD3D_SM5_OP_DFMA = 0xd3, -+ VKD3D_SM5_OP_DRCP = 0xd4, -+ VKD3D_SM5_OP_MSAD = 0xd5, -+ VKD3D_SM5_OP_DTOI = 0xd6, -+ VKD3D_SM5_OP_DTOU = 0xd7, -+ VKD3D_SM5_OP_ITOD = 0xd8, -+ VKD3D_SM5_OP_UTOD = 0xd9, -+ VKD3D_SM5_OP_GATHER4_S = 0xdb, -+ VKD3D_SM5_OP_GATHER4_C_S = 0xdc, -+ VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, -+ VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, -+ VKD3D_SM5_OP_LD_S = 0xdf, -+ VKD3D_SM5_OP_LD2DMS_S = 0xe0, -+ VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, -+ VKD3D_SM5_OP_LD_RAW_S = 0xe2, -+ VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, -+ VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, -+ VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, -+ VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, -+ VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, -+ VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, -+ VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, -+ VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, -+}; -+ -+enum vkd3d_sm4_instruction_modifier -+{ -+ VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, -+ VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, -+ VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, -+}; -+ -+enum vkd3d_sm4_register_type -+{ -+ VKD3D_SM4_RT_TEMP = 0x00, -+ VKD3D_SM4_RT_INPUT = 0x01, -+ VKD3D_SM4_RT_OUTPUT = 0x02, -+ VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, -+ VKD3D_SM4_RT_IMMCONST = 0x04, -+ VKD3D_SM4_RT_IMMCONST64 = 0x05, -+ VKD3D_SM4_RT_SAMPLER = 0x06, -+ VKD3D_SM4_RT_RESOURCE = 0x07, -+ VKD3D_SM4_RT_CONSTBUFFER = 0x08, -+ VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, -+ VKD3D_SM4_RT_PRIMID = 0x0b, -+ VKD3D_SM4_RT_DEPTHOUT = 0x0c, -+ VKD3D_SM4_RT_NULL = 0x0d, -+ VKD3D_SM4_RT_RASTERIZER = 0x0e, -+ VKD3D_SM4_RT_OMASK = 0x0f, -+ VKD3D_SM5_RT_STREAM = 0x10, -+ VKD3D_SM5_RT_FUNCTION_BODY = 0x11, -+ VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, -+ VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, -+ VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, -+ VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, -+ VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, -+ VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, -+ VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, -+ VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, -+ VKD3D_SM5_RT_UAV = 0x1e, -+ VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, -+ VKD3D_SM5_RT_THREAD_ID = 0x20, -+ VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, -+ VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, -+ VKD3D_SM5_RT_COVERAGE = 0x23, -+ VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, -+ VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, -+ VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, -+ VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, -+ VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, -+ -+ VKD3D_SM4_REGISTER_TYPE_COUNT, -+}; -+ -+enum vkd3d_sm4_extended_operand_type -+{ -+ VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, -+ VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, -+}; -+ -+enum vkd3d_sm4_register_modifier -+{ -+ VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, -+ VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, -+ VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, -+ VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, -+}; -+ -+enum vkd3d_sm4_register_precision -+{ -+ VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, -+ VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, -+ VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, -+ VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, -+ VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, -+}; -+ -+enum vkd3d_sm4_output_primitive_type -+{ -+ VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, -+ VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, -+ VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, -+}; -+ -+enum vkd3d_sm4_input_primitive_type -+{ -+ VKD3D_SM4_INPUT_PT_POINT = 0x01, -+ VKD3D_SM4_INPUT_PT_LINE = 0x02, -+ VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, -+ VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, -+ VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, -+ VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, -+ VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, -+ VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, -+ VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, -+ VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, -+ VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, -+ VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, -+ VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, -+ VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, -+ VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, -+ VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, -+ VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, -+ VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, -+ VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, -+ VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, -+ VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, -+ VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, -+ VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, -+ VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, -+ VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, -+ VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, -+ VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, -+ VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, -+ VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, -+ VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, -+ VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, -+ VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, -+ VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, -+ VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, -+ VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, -+ VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, -+ VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, -+}; -+ -+enum vkd3d_sm4_swizzle_type -+{ -+ VKD3D_SM4_SWIZZLE_NONE = 0x0, /* swizzle bitfield contains a mask */ -+ VKD3D_SM4_SWIZZLE_VEC4 = 0x1, -+ VKD3D_SM4_SWIZZLE_SCALAR = 0x2, -+}; -+ -+enum vkd3d_sm4_dimension -+{ -+ VKD3D_SM4_DIMENSION_NONE = 0x0, -+ VKD3D_SM4_DIMENSION_SCALAR = 0x1, -+ VKD3D_SM4_DIMENSION_VEC4 = 0x2, -+}; -+ -+enum vkd3d_sm4_resource_type -+{ -+ VKD3D_SM4_RESOURCE_BUFFER = 0x1, -+ VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, -+ VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, -+ VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, -+ VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, -+ VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, -+ VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, -+ VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, -+ VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, -+ VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, -+ VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, -+ VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, -+}; -+ -+enum vkd3d_sm4_data_type -+{ -+ VKD3D_SM4_DATA_UNORM = 0x1, -+ VKD3D_SM4_DATA_SNORM = 0x2, -+ VKD3D_SM4_DATA_INT = 0x3, -+ VKD3D_SM4_DATA_UINT = 0x4, -+ VKD3D_SM4_DATA_FLOAT = 0x5, -+ VKD3D_SM4_DATA_MIXED = 0x6, -+ VKD3D_SM4_DATA_DOUBLE = 0x7, -+ VKD3D_SM4_DATA_CONTINUED = 0x8, -+ VKD3D_SM4_DATA_UNUSED = 0x9, -+}; -+ -+enum vkd3d_sm4_sampler_mode -+{ -+ VKD3D_SM4_SAMPLER_DEFAULT = 0x0, -+ VKD3D_SM4_SAMPLER_COMPARISON = 0x1, -+}; -+ -+enum vkd3d_sm4_shader_data_type -+{ -+ VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, -+ VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, -+}; -+ -+struct sm4_index_range -+{ -+ unsigned int index; -+ unsigned int count; -+ unsigned int mask; -+}; -+ -+struct sm4_index_range_array -+{ -+ unsigned int count; -+ struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; -+}; -+ -+struct vkd3d_sm4_lookup_tables -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; -+ const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; -+}; -+ -+struct vkd3d_shader_sm4_parser -+{ -+ const uint32_t *start, *end, *ptr; -+ -+ unsigned int output_map[MAX_REG_OUTPUT]; -+ -+ enum vkd3d_shader_opcode phase; -+ bool has_control_point_phase; -+ unsigned int input_register_masks[MAX_REG_OUTPUT]; -+ unsigned int output_register_masks[MAX_REG_OUTPUT]; -+ unsigned int patch_constant_register_masks[MAX_REG_OUTPUT]; -+ -+ struct sm4_index_range_array input_index_ranges; -+ struct sm4_index_range_array output_index_ranges; -+ struct sm4_index_range_array patch_constant_index_ranges; -+ -+ struct vkd3d_sm4_lookup_tables lookup; -+ -+ struct vkd3d_shader_parser p; -+}; -+ -+struct vkd3d_sm4_opcode_info -+{ -+ enum vkd3d_sm4_opcode opcode; -+ enum vkd3d_shader_opcode handler_idx; -+ char dst_info[SM4_MAX_DST_COUNT]; -+ char src_info[SM4_MAX_SRC_COUNT]; -+ void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -+ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); -+}; -+ -+static const enum vkd3d_primitive_type output_primitive_type_table[] = -+{ -+ /* UNKNOWN */ VKD3D_PT_UNDEFINED, -+ /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, -+ /* UNKNOWN */ VKD3D_PT_UNDEFINED, -+ /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, -+ /* UNKNOWN */ VKD3D_PT_UNDEFINED, -+ /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, -+}; -+ -+static const enum vkd3d_primitive_type input_primitive_type_table[] = -+{ -+ /* UNKNOWN */ VKD3D_PT_UNDEFINED, -+ /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, -+ /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, -+ /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, -+ /* UNKNOWN */ VKD3D_PT_UNDEFINED, -+ /* UNKNOWN */ VKD3D_PT_UNDEFINED, -+ /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, -+ /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, -+}; -+ -+static const enum vkd3d_shader_resource_type resource_type_table[] = -+{ -+ /* 0 */ VKD3D_SHADER_RESOURCE_NONE, -+ /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, -+ /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, -+ /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -+ /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -+}; -+ -+static const enum vkd3d_data_type data_type_table[] = -+{ -+ /* 0 */ VKD3D_DATA_FLOAT, -+ /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, -+ /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, -+ /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, -+ /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, -+ /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, -+ /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, -+ /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, -+ /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, -+ /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, -+}; -+ -+static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) -+{ -+ return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); -+} -+ -+static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) -+{ -+ const struct vkd3d_shader_version *version = &sm4->p.shader_version; -+ -+ return version->major >= 5 && version->minor >= 1; -+} -+ -+static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -+ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); -+static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -+ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); -+ -+static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, -+ const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) -+{ -+ *register_space = 0; -+ -+ if (!shader_is_sm_5_1(priv)) -+ return true; -+ -+ if (*ptr >= end) -+ { -+ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -+ return false; -+ } -+ -+ *register_space = *(*ptr)++; -+ return true; -+} -+ -+static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, -+ (struct vkd3d_shader_src_param *)&ins->src[0]); -+ ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? -+ VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; -+} -+ -+static void shader_sm4_read_case_condition(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, -+ (struct vkd3d_shader_src_param *)&ins->src[0]); -+ if (ins->src[0].reg.type != VKD3DSPR_IMMCONST) -+ { -+ FIXME("Switch case value is not a 32-bit constant.\n"); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE, -+ "Switch case value is not a 32-bit immediate constant register."); -+ } -+} -+ -+static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -+ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_immediate_constant_buffer *icb; -+ enum vkd3d_sm4_shader_data_type type; -+ unsigned int icb_size; -+ -+ type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; -+ if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) -+ { -+ FIXME("Ignoring shader data type %#x.\n", type); -+ ins->handler_idx = VKD3DSIH_NOP; -+ return; -+ } -+ -+ ++tokens; -+ icb_size = token_count - 1; -+ if (icb_size % 4) -+ { -+ FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); -+ ins->handler_idx = VKD3DSIH_INVALID; -+ return; -+ } -+ -+ if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) -+ { -+ ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -+ ins->handler_idx = VKD3DSIH_INVALID; -+ return; -+ } -+ icb->vec4_count = icb_size / 4; -+ memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); -+ shader_instruction_array_add_icb(&priv->p.instructions, icb); -+ ins->declaration.icb = icb; -+} -+ -+static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, -+ const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) -+{ -+ range->first = reg->idx[1].offset; -+ range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; -+ if (range->last < range->first) -+ { -+ FIXME("Invalid register range [%u:%u].\n", range->first, range->last); -+ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, -+ "Last register %u must not be less than first register %u in range.", range->last, range->first); -+ } -+} -+ -+static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; -+ enum vkd3d_sm4_resource_type resource_type; -+ const uint32_t *end = &tokens[token_count]; -+ enum vkd3d_sm4_data_type data_type; -+ enum vkd3d_data_type reg_data_type; -+ DWORD components; -+ unsigned int i; -+ -+ resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; -+ if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) -+ { -+ FIXME("Unhandled resource type %#x.\n", resource_type); -+ semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; -+ } -+ else -+ { -+ semantic->resource_type = resource_type_table[resource_type]; -+ } -+ -+ if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS -+ || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) -+ { -+ semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) -+ >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; -+ } -+ -+ reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; -+ shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); -+ shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); -+ -+ components = *tokens++; -+ for (i = 0; i < VKD3D_VEC4_SIZE; i++) -+ { -+ data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); -+ -+ if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) -+ { -+ FIXME("Unhandled data type %#x.\n", data_type); -+ semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; -+ } -+ else -+ { -+ semantic->resource_data_type[i] = data_type_table[data_type]; -+ } -+ } -+ -+ if (reg_data_type == VKD3D_DATA_UAV) -+ ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; -+ -+ shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); -+} -+ -+static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ const uint32_t *end = &tokens[token_count]; -+ -+ shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); -+ shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); -+ if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) -+ ins->flags |= VKD3DSI_INDEXED_DYNAMIC; -+ -+ ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; -+ ins->declaration.cb.range.space = 0; -+ -+ if (shader_is_sm_5_1(priv)) -+ { -+ if (tokens >= end) -+ { -+ FIXME("Invalid ptr %p >= end %p.\n", tokens, end); -+ return; -+ } -+ -+ ins->declaration.cb.size = *tokens++; -+ shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); -+ } -+} -+ -+static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -+ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ const uint32_t *end = &tokens[token_count]; -+ -+ ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; -+ if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) -+ FIXME("Unhandled sampler mode %#x.\n", ins->flags); -+ shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); -+ shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); -+ shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); -+} -+ -+static bool sm4_parser_is_in_fork_or_join_phase(const struct vkd3d_shader_sm4_parser *sm4) -+{ -+ return sm4->phase == VKD3DSIH_HS_FORK_PHASE || sm4->phase == VKD3DSIH_HS_JOIN_PHASE; -+} -+ -+static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; -+ unsigned int i, register_idx, register_count, write_mask; -+ enum vkd3d_shader_register_type type; -+ struct sm4_index_range_array *ranges; -+ unsigned int *io_masks; -+ -+ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, -+ &index_range->dst); -+ index_range->register_count = *tokens; -+ -+ register_idx = index_range->dst.reg.idx[index_range->dst.reg.idx_count - 1].offset; -+ register_count = index_range->register_count; -+ write_mask = index_range->dst.write_mask; -+ -+ if (vkd3d_write_mask_component_count(write_mask) != 1) -+ { -+ WARN("Unhandled write mask %#x.\n", write_mask); -+ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK, -+ "Index range mask %#x is not scalar.", write_mask); -+ } -+ -+ switch ((type = index_range->dst.reg.type)) -+ { -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_INCONTROLPOINT: -+ io_masks = priv->input_register_masks; -+ ranges = &priv->input_index_ranges; -+ break; -+ case VKD3DSPR_OUTPUT: -+ if (sm4_parser_is_in_fork_or_join_phase(priv)) -+ { -+ io_masks = priv->patch_constant_register_masks; -+ ranges = &priv->patch_constant_index_ranges; -+ } -+ else -+ { -+ io_masks = priv->output_register_masks; -+ ranges = &priv->output_index_ranges; -+ } -+ break; -+ case VKD3DSPR_COLOROUT: -+ case VKD3DSPR_OUTCONTROLPOINT: -+ io_masks = priv->output_register_masks; -+ ranges = &priv->output_index_ranges; -+ break; -+ case VKD3DSPR_PATCHCONST: -+ io_masks = priv->patch_constant_register_masks; -+ ranges = &priv->patch_constant_index_ranges; -+ break; -+ -+ default: -+ WARN("Unhandled register type %#x.\n", type); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, -+ "Invalid register type %#x for index range base %u, count %u, mask %#x.", -+ type, register_idx, register_count, write_mask); -+ return; -+ } -+ -+ for (i = 0; i < ranges->count; ++i) -+ { -+ struct sm4_index_range r = ranges->ranges[i]; -+ -+ if (!(r.mask & write_mask)) -+ continue; -+ /* Ranges with the same base but different lengths are not an issue. */ -+ if (register_idx == r.index) -+ continue; -+ -+ if ((r.index <= register_idx && register_idx - r.index < r.count) -+ || (register_idx < r.index && r.index - register_idx < register_count)) -+ { -+ WARN("Detected index range collision for base %u, count %u, mask %#x.\n", -+ register_idx, register_count, write_mask); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, -+ "Register index range base %u, count %u, mask %#x collides with a previous declaration.", -+ register_idx, register_count, write_mask); -+ return; -+ } -+ } -+ ranges->ranges[ranges->count].index = register_idx; -+ ranges->ranges[ranges->count].count = register_count; -+ ranges->ranges[ranges->count++].mask = write_mask; -+ -+ for (i = 0; i < register_count; ++i) -+ { -+ if ((io_masks[register_idx + i] & write_mask) != write_mask) -+ { -+ WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", -+ register_idx, register_count, write_mask); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, -+ "Input/output registers matching index range base %u, count %u, mask %#x were not declared.", -+ register_idx, register_count, write_mask); -+ return; -+ } -+ } -+} -+ -+static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ enum vkd3d_sm4_output_primitive_type primitive_type; -+ -+ primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; -+ if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) -+ ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; -+ else -+ ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; -+ -+ if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) -+ FIXME("Unhandled output primitive type %#x.\n", primitive_type); -+} -+ -+static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ enum vkd3d_sm4_input_primitive_type primitive_type; -+ -+ primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; -+ if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) -+ { -+ ins->declaration.primitive_type.type = VKD3D_PT_PATCH; -+ ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; -+ } -+ else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) -+ { -+ ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; -+ } -+ else -+ { -+ ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; -+ } -+ -+ if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) -+ FIXME("Unhandled input primitive type %#x.\n", primitive_type); -+} -+ -+static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.count = *tokens; -+ if (opcode == VKD3D_SM4_OP_DCL_TEMPS) -+ priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); -+} -+ -+static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -+} -+ -+static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, -+ &ins->declaration.register_semantic.reg); -+ ins->declaration.register_semantic.sysval_semantic = *tokens; -+} -+ -+static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -+ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -+} -+ -+static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -+ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, -+ &ins->declaration.register_semantic.reg); -+ ins->declaration.register_semantic.sysval_semantic = *tokens; -+} -+ -+static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.indexable_temp.register_idx = *tokens++; -+ ins->declaration.indexable_temp.register_size = *tokens++; -+ ins->declaration.indexable_temp.component_count = *tokens; -+} -+ -+static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; -+} -+ -+static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -+ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; -+ src_params[0].reg.u.fp_body_idx = *tokens++; -+ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); -+} -+ -+static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.index = *tokens; -+} -+ -+static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.index = *tokens++; -+ FIXME("Ignoring set of function bodies (count %u).\n", *tokens); -+} -+ -+static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.fp.index = *tokens++; -+ ins->declaration.fp.body_count = *tokens++; -+ ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; -+ ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; -+ FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); -+} -+ -+static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) -+ >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; -+} -+ -+static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) -+ >> VKD3D_SM5_TESSELLATOR_SHIFT; -+} -+ -+static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) -+ >> VKD3D_SM5_TESSELLATOR_SHIFT; -+} -+ -+static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) -+ >> VKD3D_SM5_TESSELLATOR_SHIFT; -+} -+ -+static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.max_tessellation_factor = *(float *)tokens; -+} -+ -+static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->declaration.thread_group_size.x = *tokens++; -+ ins->declaration.thread_group_size.y = *tokens++; -+ ins->declaration.thread_group_size.z = *tokens++; -+} -+ -+static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -+ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; -+ const uint32_t *end = &tokens[token_count]; -+ -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); -+ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -+ ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; -+ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -+} -+ -+static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; -+ const uint32_t *end = &tokens[token_count]; -+ -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); -+ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -+ ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; -+ resource->byte_stride = *tokens++; -+ if (resource->byte_stride % 4) -+ FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); -+ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -+} -+ -+static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); -+ ins->declaration.tgsm_raw.byte_count = *tokens; -+ if (ins->declaration.tgsm_raw.byte_count % 4) -+ FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); -+} -+ -+static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, -+ &ins->declaration.tgsm_structured.reg); -+ ins->declaration.tgsm_structured.byte_stride = *tokens++; -+ ins->declaration.tgsm_structured.structure_count = *tokens; -+ if (ins->declaration.tgsm_structured.byte_stride % 4) -+ FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); -+} -+ -+static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; -+ const uint32_t *end = &tokens[token_count]; -+ -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); -+ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -+ resource->byte_stride = *tokens++; -+ if (resource->byte_stride % 4) -+ FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); -+ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -+} -+ -+static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; -+ const uint32_t *end = &tokens[token_count]; -+ -+ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); -+ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); -+ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -+} -+ -+static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -+ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+{ -+ ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; -+} -+ -+/* -+ * d -> VKD3D_DATA_DOUBLE -+ * f -> VKD3D_DATA_FLOAT -+ * i -> VKD3D_DATA_INT -+ * u -> VKD3D_DATA_UINT -+ * O -> VKD3D_DATA_OPAQUE -+ * R -> VKD3D_DATA_RESOURCE -+ * S -> VKD3D_DATA_SAMPLER -+ * U -> VKD3D_DATA_UAV -+ */ -+static const struct vkd3d_sm4_opcode_info opcode_table[] = -+{ -+ {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, -+ {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, -+ {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, -+ {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", -+ shader_sm4_read_conditional_op}, -+ {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", -+ shader_sm4_read_case_condition}, -+ {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, -+ {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", -+ shader_sm4_read_conditional_op}, -+ {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, -+ {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, -+ {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, -+ {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, -+ {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", -+ shader_sm4_read_conditional_op}, -+ {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, -+ {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, -+ {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, -+ {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, -+ {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, -+ {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, -+ {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, -+ {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, -+ {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, -+ {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, -+ {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, -+ {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, -+ {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, -+ {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, -+ {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, -+ {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, -+ {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", -+ shader_sm4_read_conditional_op}, -+ {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, -+ {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, -+ {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, -+ {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, -+ {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, -+ {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, -+ {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, -+ {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, -+ {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, -+ {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, -+ {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, -+ {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, -+ {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, -+ {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, -+ {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, -+ {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, -+ {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, -+ {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, -+ {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, -+ {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, -+ {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, -+ {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", -+ shader_sm4_read_shader_data}, -+ {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, -+ {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, -+ {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, -+ {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, -+ {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, -+ {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, -+ {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, -+ {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, -+ {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, -+ {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", -+ shader_sm4_read_conditional_op}, -+ {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, -+ {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, -+ {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, -+ {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, -+ {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, -+ {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, -+ {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, -+ {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, -+ {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, -+ {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, -+ {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, -+ {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, -+ {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, -+ {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, -+ {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, -+ {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, -+ {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, -+ {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, -+ {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, -+ {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, -+ {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, -+ {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, -+ {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, -+ {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", -+ shader_sm4_read_dcl_resource}, -+ {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", -+ shader_sm4_read_dcl_constant_buffer}, -+ {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", -+ shader_sm4_read_dcl_sampler}, -+ {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", -+ shader_sm4_read_dcl_index_range}, -+ {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", -+ shader_sm4_read_dcl_output_topology}, -+ {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", -+ shader_sm4_read_dcl_input_primitive}, -+ {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", -+ shader_sm4_read_declaration_count}, -+ {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", -+ shader_sm4_read_declaration_dst}, -+ {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", -+ shader_sm4_read_declaration_register_semantic}, -+ {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", -+ shader_sm4_read_declaration_register_semantic}, -+ {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", -+ shader_sm4_read_dcl_input_ps}, -+ {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", -+ shader_sm4_read_declaration_register_semantic}, -+ {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", -+ shader_sm4_read_dcl_input_ps_siv}, -+ {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", -+ shader_sm4_read_declaration_dst}, -+ {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", -+ shader_sm4_read_declaration_register_semantic}, -+ {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", -+ shader_sm4_read_declaration_count}, -+ {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", -+ shader_sm4_read_dcl_indexable_temp}, -+ {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", -+ shader_sm4_read_dcl_global_flags}, -+ {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, -+ {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, -+ {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, -+ {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, -+ {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, -+ {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, -+ {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, -+ {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, -+ {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, -+ {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, -+ {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", -+ shader_sm5_read_fcall}, -+ {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, -+ {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, -+ {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, -+ {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, -+ {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, -+ {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, -+ {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, -+ {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, -+ {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, -+ {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, -+ {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, -+ {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, -+ {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, -+ {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, -+ {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, -+ {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, -+ {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, -+ {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, -+ {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, -+ {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, -+ {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, -+ {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", -+ shader_sm5_read_dcl_function_body}, -+ {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", -+ shader_sm5_read_dcl_function_table}, -+ {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", -+ shader_sm5_read_dcl_interface}, -+ {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", -+ shader_sm5_read_control_point_count}, -+ {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", -+ shader_sm5_read_control_point_count}, -+ {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", -+ shader_sm5_read_dcl_tessellator_domain}, -+ {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", -+ shader_sm5_read_dcl_tessellator_partitioning}, -+ {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", -+ shader_sm5_read_dcl_tessellator_output_primitive}, -+ {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", -+ shader_sm5_read_dcl_hs_max_tessfactor}, -+ {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", -+ shader_sm4_read_declaration_count}, -+ {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", -+ shader_sm4_read_declaration_count}, -+ {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", -+ shader_sm5_read_dcl_thread_group}, -+ {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", -+ shader_sm4_read_dcl_resource}, -+ {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", -+ shader_sm5_read_dcl_uav_raw}, -+ {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", -+ shader_sm5_read_dcl_uav_structured}, -+ {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", -+ shader_sm5_read_dcl_tgsm_raw}, -+ {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", -+ shader_sm5_read_dcl_tgsm_structured}, -+ {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", -+ shader_sm5_read_dcl_resource_raw}, -+ {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", -+ shader_sm5_read_dcl_resource_structured}, -+ {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, -+ {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, -+ {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, -+ {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, -+ {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, -+ {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, -+ {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, -+ {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, -+ {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, -+ {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, -+ {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, -+ {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, -+ {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, -+ {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", -+ shader_sm5_read_sync}, -+ {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, -+ {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, -+ {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, -+ {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, -+ {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, -+ {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, -+ {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, -+ {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, -+ {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, -+ {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, -+ {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, -+ {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, -+ {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, -+ {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, -+ {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", -+ shader_sm4_read_declaration_count}, -+ {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, -+ {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, -+ {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, -+ {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, -+ {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, -+ {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, -+ {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, -+ {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, -+ {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, -+ {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, -+ {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, -+ {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, -+ {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, -+ {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, -+ {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, -+ {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, -+ {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, -+ {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, -+ {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, -+ {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, -+ {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, -+ {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, -+ {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, -+ {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, -+}; -+ -+struct vkd3d_sm4_register_type_info -+{ -+ enum vkd3d_sm4_register_type sm4_type; -+ enum vkd3d_shader_register_type vkd3d_type; -+}; -+ -+static const enum vkd3d_shader_register_precision register_precision_table[] = -+{ -+ /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, -+ /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, -+ /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, -+ /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, -+ /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, -+ /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, -+}; -+ -+struct tpf_writer -+{ -+ struct hlsl_ctx *ctx; -+ struct vkd3d_bytecode_buffer *buffer; -+ struct vkd3d_sm4_lookup_tables lookup; -+}; -+ -+static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) -+ { -+ if (opcode == opcode_table[i].opcode) -+ return &opcode_table[i]; -+ } -+ -+ return NULL; -+} -+ -+static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) -+{ -+ const struct vkd3d_sm4_register_type_info *info; -+ unsigned int i; -+ -+ static const struct vkd3d_sm4_register_type_info register_type_table[] = -+ { -+ {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, -+ {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, -+ {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, -+ {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, -+ {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, -+ {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, -+ {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, -+ {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, -+ {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, -+ {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, -+ {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, -+ {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, -+ {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, -+ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, -+ {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, -+ {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, -+ {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, -+ {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, -+ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, -+ {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, -+ {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, -+ {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, -+ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, -+ {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, -+ {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, -+ {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, -+ {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, -+ {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, -+ {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, -+ {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, -+ {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, -+ {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, -+ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, -+ {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, -+ {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, -+ {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, -+ }; -+ -+ memset(lookup, 0, sizeof(*lookup)); -+ -+ for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) -+ { -+ info = ®ister_type_table[i]; -+ lookup->register_type_info_from_sm4[info->sm4_type] = info; -+ lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; -+ } -+} -+ -+static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+{ -+ tpf->ctx = ctx; -+ tpf->buffer = buffer; -+ init_sm4_lookup_tables(&tpf->lookup); -+} -+ -+static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) -+{ -+ if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) -+ return NULL; -+ return lookup->register_type_info_from_sm4[sm4_type]; -+} -+ -+static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) -+{ -+ if (vkd3d_type >= VKD3DSPR_COUNT) -+ return NULL; -+ return lookup->register_type_info_from_vkd3d[vkd3d_type]; -+} -+ -+static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) -+{ -+ switch (sm4->p.shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_PIXEL: -+ if (reg->type == VKD3DSPR_OUTPUT) -+ { -+ unsigned int reg_idx = reg->idx[0].offset; -+ -+ if (reg_idx >= ARRAY_SIZE(sm4->output_map)) -+ { -+ /* Validated later */ -+ break; -+ } -+ -+ reg->type = VKD3DSPR_COLOROUT; -+ reg->idx[0].offset = sm4->output_map[reg_idx]; -+ } -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+static enum vkd3d_data_type map_data_type(char t) -+{ -+ switch (t) -+ { -+ case 'd': -+ return VKD3D_DATA_DOUBLE; -+ case 'f': -+ return VKD3D_DATA_FLOAT; -+ case 'i': -+ return VKD3D_DATA_INT; -+ case 'u': -+ return VKD3D_DATA_UINT; -+ case 'O': -+ return VKD3D_DATA_OPAQUE; -+ case 'R': -+ return VKD3D_DATA_RESOURCE; -+ case 'S': -+ return VKD3D_DATA_SAMPLER; -+ case 'U': -+ return VKD3D_DATA_UAV; -+ default: -+ ERR("Invalid data type '%c'.\n", t); -+ return VKD3D_DATA_FLOAT; -+ } -+} -+ -+static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) -+{ -+ struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); -+ -+ shader_instruction_array_destroy(&parser->instructions); -+ free_shader_desc(&parser->shader_desc); -+ vkd3d_free(sm4); -+} -+ -+static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -+ const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) -+{ -+ if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) -+ { -+ struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); -+ -+ if (!(reg_idx->rel_addr = rel_addr)) -+ { -+ ERR("Failed to get src param for relative addressing.\n"); -+ return false; -+ } -+ -+ if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) -+ reg_idx->offset = *(*ptr)++; -+ else -+ reg_idx->offset = 0; -+ shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); -+ } -+ else -+ { -+ reg_idx->rel_addr = NULL; -+ reg_idx->offset = *(*ptr)++; -+ } -+ -+ return true; -+} -+ -+static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) -+{ -+ switch (register_type) -+ { -+ case VKD3D_SM4_RT_SAMPLER: -+ case VKD3D_SM4_RT_RESOURCE: -+ case VKD3D_SM4_RT_CONSTBUFFER: -+ case VKD3D_SM5_RT_UAV: -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, -+ enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info; -+ enum vkd3d_sm4_register_precision precision; -+ enum vkd3d_sm4_register_type register_type; -+ enum vkd3d_sm4_extended_operand_type type; -+ enum vkd3d_sm4_register_modifier m; -+ uint32_t token, order, extended; -+ -+ if (*ptr >= end) -+ { -+ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -+ return false; -+ } -+ token = *(*ptr)++; -+ -+ register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); -+ if (!register_type_info) -+ { -+ FIXME("Unhandled register type %#x.\n", register_type); -+ param->type = VKD3DSPR_TEMP; -+ } -+ else -+ { -+ param->type = register_type_info->vkd3d_type; -+ } -+ param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; -+ param->non_uniform = false; -+ param->data_type = data_type; -+ -+ *modifier = VKD3DSPSM_NONE; -+ if (token & VKD3D_SM4_EXTENDED_OPERAND) -+ { -+ if (*ptr >= end) -+ { -+ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -+ return false; -+ } -+ extended = *(*ptr)++; -+ -+ if (extended & VKD3D_SM4_EXTENDED_OPERAND) -+ { -+ FIXME("Skipping second-order extended operand.\n"); -+ *ptr += *ptr < end; -+ } -+ -+ type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; -+ if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) -+ { -+ m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; -+ switch (m) -+ { -+ case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: -+ *modifier = VKD3DSPSM_NEG; -+ break; -+ -+ case VKD3D_SM4_REGISTER_MODIFIER_ABS: -+ *modifier = VKD3DSPSM_ABS; -+ break; -+ -+ case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: -+ *modifier = VKD3DSPSM_ABSNEG; -+ break; -+ -+ default: -+ FIXME("Unhandled register modifier %#x.\n", m); -+ /* fall-through */ -+ case VKD3D_SM4_REGISTER_MODIFIER_NONE: -+ break; -+ } -+ -+ precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; -+ if (precision >= ARRAY_SIZE(register_precision_table) -+ || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) -+ { -+ FIXME("Unhandled register precision %#x.\n", precision); -+ param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; -+ } -+ else -+ { -+ param->precision = register_precision_table[precision]; -+ } -+ -+ if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) -+ param->non_uniform = true; -+ -+ extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK -+ | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK -+ | VKD3D_SM4_EXTENDED_OPERAND); -+ if (extended) -+ FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); -+ } -+ else if (type) -+ { -+ FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); -+ } -+ } -+ -+ order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; -+ -+ if (order < 1) -+ { -+ param->idx[0].offset = ~0u; -+ param->idx[0].rel_addr = NULL; -+ } -+ else -+ { -+ DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; -+ if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) -+ { -+ ERR("Failed to read register index.\n"); -+ return false; -+ } -+ } -+ -+ if (order < 2) -+ { -+ param->idx[1].offset = ~0u; -+ param->idx[1].rel_addr = NULL; -+ } -+ else -+ { -+ DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; -+ if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) -+ { -+ ERR("Failed to read register index.\n"); -+ return false; -+ } -+ } -+ -+ if (order < 3) -+ { -+ param->idx[2].offset = ~0u; -+ param->idx[2].rel_addr = NULL; -+ } -+ else -+ { -+ DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; -+ if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) -+ { -+ ERR("Failed to read register index.\n"); -+ return false; -+ } -+ } -+ -+ if (order > 3) -+ { -+ WARN("Unhandled order %u.\n", order); -+ return false; -+ } -+ -+ param->idx_count = order; -+ -+ if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) -+ { -+ enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; -+ unsigned int dword_count; -+ -+ switch (dimension) -+ { -+ case VKD3D_SM4_DIMENSION_SCALAR: -+ param->immconst_type = VKD3D_IMMCONST_SCALAR; -+ dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); -+ if (end - *ptr < dword_count) -+ { -+ WARN("Invalid ptr %p, end %p.\n", *ptr, end); -+ return false; -+ } -+ memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); -+ *ptr += dword_count; -+ break; -+ -+ case VKD3D_SM4_DIMENSION_VEC4: -+ param->immconst_type = VKD3D_IMMCONST_VEC4; -+ if (end - *ptr < VKD3D_VEC4_SIZE) -+ { -+ WARN("Invalid ptr %p, end %p.\n", *ptr, end); -+ return false; -+ } -+ memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); -+ *ptr += 4; -+ break; -+ -+ default: -+ FIXME("Unhandled dimension %#x.\n", dimension); -+ break; -+ } -+ } -+ else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) -+ { -+ /* SM5.1 places a symbol identifier in idx[0] and moves -+ * other values up one slot. Normalize to SM5.1. */ -+ param->idx[2] = param->idx[1]; -+ param->idx[1] = param->idx[0]; -+ ++param->idx_count; -+ } -+ -+ map_register(priv, param); -+ -+ return true; -+} -+ -+static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) -+{ -+ switch (reg->type) -+ { -+ case VKD3DSPR_COVERAGE: -+ case VKD3DSPR_DEPTHOUT: -+ case VKD3DSPR_DEPTHOUTGE: -+ case VKD3DSPR_DEPTHOUTLE: -+ case VKD3DSPR_GSINSTID: -+ case VKD3DSPR_LOCALTHREADINDEX: -+ case VKD3DSPR_OUTPOINTID: -+ case VKD3DSPR_PRIMID: -+ case VKD3DSPR_SAMPLEMASK: -+ case VKD3DSPR_OUTSTENCILREF: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static uint32_t swizzle_from_sm4(uint32_t s) -+{ -+ return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); -+} -+ -+static bool register_is_input_output(const struct vkd3d_shader_register *reg) -+{ -+ switch (reg->type) -+ { -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_OUTPUT: -+ case VKD3DSPR_COLOROUT: -+ case VKD3DSPR_INCONTROLPOINT: -+ case VKD3DSPR_OUTCONTROLPOINT: -+ case VKD3DSPR_PATCHCONST: -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+static bool register_is_control_point_input(const struct vkd3d_shader_register *reg, -+ const struct vkd3d_shader_sm4_parser *priv) -+{ -+ return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT -+ || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE -+ || priv->p.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); -+} -+ -+static unsigned int mask_from_swizzle(unsigned int swizzle) -+{ -+ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) -+ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); -+} -+ -+static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_parser *priv, -+ const struct vkd3d_shader_register *reg, unsigned int mask) -+{ -+ unsigned int idx_count = 1 + register_is_control_point_input(reg, priv); -+ const unsigned int *masks; -+ unsigned int register_idx; -+ -+ if (reg->idx_count != idx_count) -+ { -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT, -+ "Invalid index count %u for register type %#x; expected count %u.", -+ reg->idx_count, reg->type, idx_count); -+ return false; -+ } -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_INCONTROLPOINT: -+ masks = priv->input_register_masks; -+ break; -+ case VKD3DSPR_OUTPUT: -+ masks = sm4_parser_is_in_fork_or_join_phase(priv) ? priv->patch_constant_register_masks -+ : priv->output_register_masks; -+ break; -+ case VKD3DSPR_COLOROUT: -+ case VKD3DSPR_OUTCONTROLPOINT: -+ masks = priv->output_register_masks; -+ break; -+ case VKD3DSPR_PATCHCONST: -+ masks = priv->patch_constant_register_masks; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ register_idx = reg->idx[reg->idx_count - 1].offset; -+ /* The signature element registers have already been checked against MAX_REG_OUTPUT. */ -+ if (register_idx >= MAX_REG_OUTPUT || (masks[register_idx] & mask) != mask) -+ { -+ WARN("Failed to find signature element for register type %#x, index %u and mask %#x.\n", -+ reg->type, register_idx, mask); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER, -+ "Could not find signature element matching register type %#x, index %u and mask %#x.", -+ reg->type, register_idx, mask); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -+ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) -+{ -+ unsigned int dimension, mask; -+ DWORD token; -+ -+ if (*ptr >= end) -+ { -+ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -+ return false; -+ } -+ token = **ptr; -+ -+ if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) -+ { -+ ERR("Failed to read parameter.\n"); -+ return false; -+ } -+ -+ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) -+ { -+ case VKD3D_SM4_DIMENSION_NONE: -+ case VKD3D_SM4_DIMENSION_SCALAR: -+ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ break; -+ -+ case VKD3D_SM4_DIMENSION_VEC4: -+ { -+ enum vkd3d_sm4_swizzle_type swizzle_type = -+ (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ -+ switch (swizzle_type) -+ { -+ case VKD3D_SM4_SWIZZLE_NONE: -+ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; -+ /* Mask seems only to be used for vec4 constants and is always zero. */ -+ if (!register_is_constant(&src_param->reg)) -+ { -+ FIXME("Source mask %#x is not for a constant.\n", mask); -+ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, -+ "Unhandled mask %#x for a non-constant source register.", mask); -+ } -+ else if (mask) -+ { -+ FIXME("Unhandled mask %#x.\n", mask); -+ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK, -+ "Unhandled source register mask %#x.", mask); -+ } -+ -+ break; -+ -+ case VKD3D_SM4_SWIZZLE_SCALAR: -+ src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; -+ src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; -+ break; -+ -+ case VKD3D_SM4_SWIZZLE_VEC4: -+ src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); -+ break; -+ -+ default: -+ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, -+ "Source register swizzle type %#x is invalid.", swizzle_type); -+ break; -+ } -+ break; -+ } -+ -+ default: -+ FIXME("Unhandled dimension %#x.\n", dimension); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, -+ "Source register dimension %#x is invalid.", dimension); -+ break; -+ } -+ -+ if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, -+ &src_param->reg, mask_from_swizzle(src_param->swizzle))) -+ return false; -+ -+ return true; -+} -+ -+static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, -+ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) -+{ -+ enum vkd3d_sm4_swizzle_type swizzle_type; -+ enum vkd3d_shader_src_modifier modifier; -+ unsigned int dimension, swizzle; -+ DWORD token; -+ -+ if (*ptr >= end) -+ { -+ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); -+ return false; -+ } -+ token = **ptr; -+ -+ if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) -+ { -+ ERR("Failed to read parameter.\n"); -+ return false; -+ } -+ -+ if (modifier != VKD3DSPSM_NONE) -+ { -+ ERR("Invalid source modifier %#x on destination register.\n", modifier); -+ return false; -+ } -+ -+ switch ((dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT)) -+ { -+ case VKD3D_SM4_DIMENSION_NONE: -+ dst_param->write_mask = 0; -+ break; -+ -+ case VKD3D_SM4_DIMENSION_SCALAR: -+ dst_param->write_mask = VKD3DSP_WRITEMASK_0; -+ break; -+ -+ case VKD3D_SM4_DIMENSION_VEC4: -+ swizzle_type = (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ switch (swizzle_type) -+ { -+ case VKD3D_SM4_SWIZZLE_NONE: -+ dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; -+ break; -+ -+ case VKD3D_SM4_SWIZZLE_VEC4: -+ swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); -+ if (swizzle != VKD3D_SHADER_NO_SWIZZLE) -+ { -+ FIXME("Unhandled swizzle %#x.\n", swizzle); -+ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE, -+ "Unhandled destination register swizzle %#x.", swizzle); -+ } -+ dst_param->write_mask = VKD3DSP_WRITEMASK_ALL; -+ break; -+ -+ default: -+ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE, -+ "Destination register swizzle type %#x is invalid.", swizzle_type); -+ break; -+ } -+ break; -+ -+ default: -+ FIXME("Unhandled dimension %#x.\n", dimension); -+ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION, -+ "Destination register dimension %#x is invalid.", dimension); -+ break; -+ } -+ -+ if (data_type == VKD3D_DATA_DOUBLE) -+ dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); -+ /* Some scalar registers are declared with no write mask in shader bytecode. */ -+ if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) -+ dst_param->write_mask = VKD3DSP_WRITEMASK_0; -+ dst_param->modifiers = 0; -+ dst_param->shift = 0; -+ -+ if (register_is_input_output(&dst_param->reg) && !shader_sm4_validate_input_output_register(priv, -+ &dst_param->reg, dst_param->write_mask)) -+ return false; -+ -+ return true; -+} -+ -+static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) -+{ -+ enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; -+ -+ switch (modifier_type) -+ { -+ case VKD3D_SM4_MODIFIER_AOFFIMMI: -+ { -+ static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER -+ | VKD3D_SM4_MODIFIER_MASK -+ | VKD3D_SM4_AOFFIMMI_U_MASK -+ | VKD3D_SM4_AOFFIMMI_V_MASK -+ | VKD3D_SM4_AOFFIMMI_W_MASK; -+ -+ /* Bit fields are used for sign extension. */ -+ struct -+ { -+ int u : 4; -+ int v : 4; -+ int w : 4; -+ } aoffimmi; -+ -+ if (modifier & ~recognized_bits) -+ FIXME("Unhandled instruction modifier %#x.\n", modifier); -+ -+ aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; -+ aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; -+ aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; -+ ins->texel_offset.u = aoffimmi.u; -+ ins->texel_offset.v = aoffimmi.v; -+ ins->texel_offset.w = aoffimmi.w; -+ break; -+ } -+ -+ case VKD3D_SM5_MODIFIER_DATA_TYPE: -+ { -+ DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; -+ unsigned int i; -+ -+ for (i = 0; i < VKD3D_VEC4_SIZE; i++) -+ { -+ enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); -+ -+ if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) -+ { -+ FIXME("Unhandled data type %#x.\n", data_type); -+ ins->resource_data_type[i] = VKD3D_DATA_FLOAT; -+ } -+ else -+ { -+ ins->resource_data_type[i] = data_type_table[data_type]; -+ } -+ } -+ break; -+ } -+ -+ case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: -+ { -+ enum vkd3d_sm4_resource_type resource_type -+ = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; -+ -+ if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) -+ ins->raw = true; -+ else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) -+ ins->structured = true; -+ -+ if (resource_type < ARRAY_SIZE(resource_type_table)) -+ ins->resource_type = resource_type_table[resource_type]; -+ else -+ { -+ FIXME("Unhandled resource type %#x.\n", resource_type); -+ ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; -+ } -+ -+ ins->resource_stride -+ = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; -+ break; -+ } -+ -+ default: -+ FIXME("Unhandled instruction modifier %#x.\n", modifier); -+ } -+} -+ -+static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) -+{ -+ const struct vkd3d_sm4_opcode_info *opcode_info; -+ uint32_t opcode_token, opcode, previous_token; -+ struct vkd3d_shader_dst_param *dst_params; -+ struct vkd3d_shader_src_param *src_params; -+ const uint32_t **ptr = &sm4->ptr; -+ unsigned int i, len; -+ size_t remaining; -+ const uint32_t *p; -+ DWORD precise; -+ -+ if (*ptr >= sm4->end) -+ { -+ WARN("End of byte-code, failed to read opcode.\n"); -+ goto fail; -+ } -+ remaining = sm4->end - *ptr; -+ -+ ++sm4->p.location.line; -+ -+ opcode_token = *(*ptr)++; -+ opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; -+ -+ len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); -+ if (!len) -+ { -+ if (remaining < 2) -+ { -+ WARN("End of byte-code, failed to read length token.\n"); -+ goto fail; -+ } -+ len = **ptr; -+ } -+ if (!len || remaining < len) -+ { -+ WARN("Read invalid length %u (remaining %zu).\n", len, remaining); -+ goto fail; -+ } -+ --len; -+ -+ if (!(opcode_info = get_opcode_info(opcode))) -+ { -+ FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); -+ ins->handler_idx = VKD3DSIH_INVALID; -+ *ptr += len; -+ return; -+ } -+ -+ ins->handler_idx = opcode_info->handler_idx; -+ if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE -+ || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -+ sm4->phase = ins->handler_idx; -+ sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+ ins->flags = 0; -+ ins->coissue = false; -+ ins->raw = false; -+ ins->structured = false; -+ ins->predicate = NULL; -+ ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); -+ ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); -+ ins->src = src_params = shader_parser_get_src_params(&sm4->p, ins->src_count); -+ if (!src_params && ins->src_count) -+ { -+ ERR("Failed to allocate src parameters.\n"); -+ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -+ ins->handler_idx = VKD3DSIH_INVALID; -+ return; -+ } -+ ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; -+ ins->resource_stride = 0; -+ ins->resource_data_type[0] = VKD3D_DATA_FLOAT; -+ ins->resource_data_type[1] = VKD3D_DATA_FLOAT; -+ ins->resource_data_type[2] = VKD3D_DATA_FLOAT; -+ ins->resource_data_type[3] = VKD3D_DATA_FLOAT; -+ memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); -+ -+ p = *ptr; -+ *ptr += len; -+ -+ if (opcode_info->read_opcode_func) -+ { -+ ins->dst = NULL; -+ ins->dst_count = 0; -+ opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); -+ } -+ else -+ { -+ enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; -+ -+ previous_token = opcode_token; -+ while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) -+ shader_sm4_read_instruction_modifier(previous_token = *p++, ins); -+ -+ ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) -+ { -+ ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; -+ instruction_dst_modifier = VKD3DSPDM_SATURATE; -+ } -+ precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; -+ ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; -+ -+ ins->dst = dst_params = shader_parser_get_dst_params(&sm4->p, ins->dst_count); -+ if (!dst_params && ins->dst_count) -+ { -+ ERR("Failed to allocate dst parameters.\n"); -+ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -+ ins->handler_idx = VKD3DSIH_INVALID; -+ return; -+ } -+ for (i = 0; i < ins->dst_count; ++i) -+ { -+ if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), -+ &dst_params[i]))) -+ { -+ ins->handler_idx = VKD3DSIH_INVALID; -+ return; -+ } -+ dst_params[i].modifiers |= instruction_dst_modifier; -+ } -+ -+ for (i = 0; i < ins->src_count; ++i) -+ { -+ if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), -+ &src_params[i]))) -+ { -+ ins->handler_idx = VKD3DSIH_INVALID; -+ return; -+ } -+ } -+ } -+ -+ return; -+ -+fail: -+ *ptr = sm4->end; -+ ins->handler_idx = VKD3DSIH_INVALID; -+ return; -+} -+ -+static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = -+{ -+ .parser_destroy = shader_sm4_destroy, -+}; -+ -+static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, -+ size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_version version; -+ uint32_t version_token, token_count; -+ unsigned int i; -+ -+ if (byte_code_size / sizeof(*byte_code) < 2) -+ { -+ WARN("Invalid byte code size %lu.\n", (long)byte_code_size); -+ return false; -+ } -+ -+ version_token = byte_code[0]; -+ TRACE("Version: 0x%08x.\n", version_token); -+ token_count = byte_code[1]; -+ TRACE("Token count: %u.\n", token_count); -+ -+ if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) -+ { -+ WARN("Invalid token count %u.\n", token_count); -+ return false; -+ } -+ -+ sm4->start = &byte_code[2]; -+ sm4->end = &byte_code[token_count]; -+ -+ switch (version_token >> 16) -+ { -+ case VKD3D_SM4_PS: -+ version.type = VKD3D_SHADER_TYPE_PIXEL; -+ break; -+ -+ case VKD3D_SM4_VS: -+ version.type = VKD3D_SHADER_TYPE_VERTEX; -+ break; -+ -+ case VKD3D_SM4_GS: -+ version.type = VKD3D_SHADER_TYPE_GEOMETRY; -+ break; -+ -+ case VKD3D_SM5_HS: -+ version.type = VKD3D_SHADER_TYPE_HULL; -+ break; -+ -+ case VKD3D_SM5_DS: -+ version.type = VKD3D_SHADER_TYPE_DOMAIN; -+ break; -+ -+ case VKD3D_SM5_CS: -+ version.type = VKD3D_SHADER_TYPE_COMPUTE; -+ break; -+ -+ default: -+ FIXME("Unrecognised shader type %#x.\n", version_token >> 16); -+ } -+ version.major = VKD3D_SM4_VERSION_MAJOR(version_token); -+ version.minor = VKD3D_SM4_VERSION_MINOR(version_token); -+ -+ /* Estimate instruction count to avoid reallocation in most shaders. */ -+ if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, -+ token_count / 7u + 20)) -+ return false; -+ sm4->ptr = sm4->start; -+ -+ memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); -+ for (i = 0; i < output_signature->element_count; ++i) -+ { -+ struct signature_element *e = &output_signature->elements[i]; -+ -+ if (version.type == VKD3D_SHADER_TYPE_PIXEL -+ && ascii_strcasecmp(e->semantic_name, "SV_Target")) -+ continue; -+ if (e->register_index >= ARRAY_SIZE(sm4->output_map)) -+ { -+ WARN("Invalid output index %u.\n", e->register_index); -+ continue; -+ } -+ -+ sm4->output_map[e->register_index] = e->semantic_index; -+ } -+ -+ init_sm4_lookup_tables(&sm4->lookup); -+ -+ return true; -+} -+ -+static bool shader_sm4_parser_validate_signature(struct vkd3d_shader_sm4_parser *sm4, -+ const struct shader_signature *signature, unsigned int *masks, const char *name) -+{ -+ unsigned int i, register_idx, register_count, mask; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ register_idx = signature->elements[i].register_index; -+ register_count = signature->elements[i].register_count; -+ if (register_idx != ~0u && (register_idx >= MAX_REG_OUTPUT || MAX_REG_OUTPUT - register_idx < register_count)) -+ { -+ WARN("%s signature element %u unhandled register index %u, count %u.\n", -+ name, i, register_idx, register_count); -+ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS, -+ "%s signature element %u register index %u, count %u exceeds maximum index of %u.", name, -+ i, register_idx, register_count, MAX_REG_OUTPUT - 1); -+ return false; -+ } -+ -+ if (!vkd3d_bitmask_is_contiguous(mask = signature->elements[i].mask)) -+ { -+ WARN("%s signature element %u mask %#x is not contiguous.\n", name, i, mask); -+ vkd3d_shader_parser_warning(&sm4->p, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS, -+ "%s signature element %u mask %#x is not contiguous.", name, i, mask); -+ } -+ -+ if (register_idx != ~0u) -+ masks[register_idx] |= mask; -+ } -+ -+ return true; -+} -+ -+static int index_range_compare(const void *a, const void *b) -+{ -+ return memcmp(a, b, sizeof(struct sm4_index_range)); -+} -+ -+static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_sm4_parser *sm4) -+{ -+ if (!sm4->input_index_ranges.count || !sm4->output_index_ranges.count) -+ return; -+ -+ if (sm4->input_index_ranges.count == sm4->output_index_ranges.count) -+ { -+ qsort(sm4->input_index_ranges.ranges, sm4->input_index_ranges.count, sizeof(sm4->input_index_ranges.ranges[0]), -+ index_range_compare); -+ qsort(sm4->output_index_ranges.ranges, sm4->output_index_ranges.count, sizeof(sm4->output_index_ranges.ranges[0]), -+ index_range_compare); -+ if (!memcmp(sm4->input_index_ranges.ranges, sm4->output_index_ranges.ranges, -+ sm4->input_index_ranges.count * sizeof(sm4->input_index_ranges.ranges[0]))) -+ return; -+ } -+ -+ /* This is very unlikely to occur and would complicate the default control point phase implementation. */ -+ WARN("Default phase index ranges are not identical.\n"); -+ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, -+ "Default control point phase input and output index range declarations are not identical."); -+ return; -+} -+ -+int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -+{ -+ struct vkd3d_shader_instruction_array *instructions; -+ struct vkd3d_shader_desc *shader_desc; -+ struct vkd3d_shader_instruction *ins; -+ struct vkd3d_shader_sm4_parser *sm4; -+ int ret; -+ -+ if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) -+ { -+ ERR("Failed to allocate parser.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ shader_desc = &sm4->p.shader_desc; -+ shader_desc->is_dxil = false; -+ if ((ret = shader_extract_from_dxbc(&compile_info->source, -+ message_context, compile_info->source_name, shader_desc)) < 0) -+ { -+ WARN("Failed to extract shader, vkd3d result %d.\n", ret); -+ vkd3d_free(sm4); -+ return ret; -+ } -+ -+ if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, -+ compile_info->source_name, &shader_desc->output_signature, message_context)) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ free_shader_desc(shader_desc); -+ vkd3d_free(sm4); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ -+ if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, -+ sm4->input_register_masks, "Input") -+ || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, -+ sm4->output_register_masks, "Output") -+ || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, -+ sm4->patch_constant_register_masks, "Patch constant")) -+ { -+ shader_sm4_destroy(&sm4->p); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ instructions = &sm4->p.instructions; -+ while (sm4->ptr != sm4->end) -+ { -+ if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) -+ { -+ ERR("Failed to allocate instructions.\n"); -+ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -+ shader_sm4_destroy(&sm4->p); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ ins = &instructions->elements[instructions->count]; -+ shader_sm4_read_instruction(sm4, ins); -+ -+ if (ins->handler_idx == VKD3DSIH_INVALID) -+ { -+ WARN("Encountered unrecognized or invalid instruction.\n"); -+ shader_sm4_destroy(&sm4->p); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ ++instructions->count; -+ } -+ if (sm4->p.shader_version.type == VKD3D_SHADER_TYPE_HULL && !sm4->has_control_point_phase && !sm4->p.failed) -+ shader_sm4_validate_default_phase_index_ranges(sm4); -+ -+ *parser = &sm4->p; -+ -+ return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; -+} -+ -+static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); -+ -+static bool type_is_integer(const struct hlsl_type *type) -+{ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -+ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) -+{ -+ unsigned int i; -+ -+ static const struct -+ { -+ const char *semantic; -+ bool output; -+ enum vkd3d_shader_type shader_type; -+ enum vkd3d_sm4_swizzle_type swizzle_type; -+ enum vkd3d_shader_register_type type; -+ bool has_idx; -+ } -+ register_table[] = -+ { -+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, -+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, -+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false}, -+ -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false}, -+ -+ /* Put sv_target in this table, instead of letting it fall through to -+ * default varying allocation, so that the register index matches the -+ * usage index. */ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, -+ }; -+ -+ for (i = 0; i < ARRAY_SIZE(register_table); ++i) -+ { -+ if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -+ && output == register_table[i].output -+ && ctx->profile->type == register_table[i].shader_type) -+ { -+ if (type) -+ *type = register_table[i].type; -+ if (swizzle_type) -+ *swizzle_type = register_table[i].swizzle_type; -+ *has_idx = register_table[i].has_idx; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -+ bool output, D3D_NAME *usage) -+{ -+ unsigned int i; -+ -+ static const struct -+ { -+ const char *name; -+ bool output; -+ enum vkd3d_shader_type shader_type; -+ D3DDECLUSAGE usage; -+ } -+ semantics[] = -+ { -+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -+ -+ {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, -+ -+ {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, -+ {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, -+ -+ {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, -+ {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -+ -+ {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, -+ {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, -+ -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, -+ }; -+ -+ for (i = 0; i < ARRAY_SIZE(semantics); ++i) -+ { -+ if (!ascii_strcasecmp(semantic->name, semantics[i].name) -+ && output == semantics[i].output -+ && ctx->profile->type == semantics[i].shader_type -+ && !ascii_strncasecmp(semantic->name, "sv_", 3)) -+ { -+ *usage = semantics[i].usage; -+ return true; -+ } -+ } -+ -+ if (!ascii_strncasecmp(semantic->name, "sv_", 3)) -+ return false; -+ -+ *usage = D3D_NAME_UNDEFINED; -+ return true; -+} -+ -+static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, -+ uint32_t tag, struct vkd3d_bytecode_buffer *buffer) -+{ -+ /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN -+ * sections to be aligned. Without this, the sections themselves will be -+ * aligned, but their reported sizes won't. */ -+ size_t size = bytecode_align(buffer); -+ -+ dxbc_writer_add_section(dxbc, tag, buffer->data, size); -+ -+ if (buffer->status < 0) -+ ctx->result = buffer->status; -+} -+ -+static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -+{ -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ struct vkd3d_string_buffer *string; -+ const struct hlsl_ir_var *var; -+ size_t count_position; -+ unsigned int i; -+ bool ret; -+ -+ count_position = put_u32(&buffer, 0); -+ put_u32(&buffer, 8); /* unknown */ -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -+ uint32_t usage_idx, reg_idx; -+ D3D_NAME usage; -+ bool has_idx; -+ -+ if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -+ continue; -+ -+ ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -+ assert(ret); -+ if (usage == ~0u) -+ continue; -+ usage_idx = var->semantic.index; -+ -+ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) -+ { -+ reg_idx = has_idx ? var->semantic.index : ~0u; -+ } -+ else -+ { -+ assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; -+ } -+ -+ use_mask = width; /* FIXME: accurately report use mask */ -+ if (output) -+ use_mask = 0xf ^ use_mask; -+ -+ /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ -+ if (usage >= 64) -+ usage = 0; -+ -+ put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, usage_idx); -+ put_u32(&buffer, usage); -+ switch (var->data_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); -+ break; -+ -+ case HLSL_TYPE_INT: -+ put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); -+ break; -+ -+ default: -+ if ((string = hlsl_type_to_string(ctx, var->data_type))) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Invalid data type %s for semantic variable %s.", string->buffer, var->name); -+ hlsl_release_string_buffer(ctx, string); -+ put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); -+ } -+ put_u32(&buffer, reg_idx); -+ put_u32(&buffer, vkd3d_make_u16(width, use_mask)); -+ } -+ -+ i = 0; -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ const char *semantic = var->semantic.name; -+ size_t string_offset; -+ D3D_NAME usage; -+ -+ if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -+ continue; -+ -+ hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -+ if (usage == ~0u) -+ continue; -+ -+ if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) -+ string_offset = put_string(&buffer, "SV_Target"); -+ else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) -+ string_offset = put_string(&buffer, "SV_Depth"); -+ else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) -+ string_offset = put_string(&buffer, "SV_Position"); -+ else -+ string_offset = put_string(&buffer, semantic); -+ set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); -+ } -+ -+ set_u32(&buffer, count_position, i); -+ -+ add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); -+} -+ -+static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -+{ -+ switch (type->class) -+ { -+ case HLSL_CLASS_ARRAY: -+ return sm4_class(type->e.array.type); -+ case HLSL_CLASS_MATRIX: -+ assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -+ return D3D_SVC_MATRIX_COLUMNS; -+ else -+ return D3D_SVC_MATRIX_ROWS; -+ case HLSL_CLASS_OBJECT: -+ return D3D_SVC_OBJECT; -+ case HLSL_CLASS_SCALAR: -+ return D3D_SVC_SCALAR; -+ case HLSL_CLASS_STRUCT: -+ return D3D_SVC_STRUCT; -+ case HLSL_CLASS_VECTOR: -+ return D3D_SVC_VECTOR; -+ default: -+ ERR("Invalid class %#x.\n", type->class); -+ vkd3d_unreachable(); -+ } -+} -+ -+static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -+{ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_BOOL: -+ return D3D_SVT_BOOL; -+ case HLSL_TYPE_DOUBLE: -+ return D3D_SVT_DOUBLE; -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ return D3D_SVT_FLOAT; -+ case HLSL_TYPE_INT: -+ return D3D_SVT_INT; -+ case HLSL_TYPE_PIXELSHADER: -+ return D3D_SVT_PIXELSHADER; -+ case HLSL_TYPE_SAMPLER: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3D_SVT_SAMPLER1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3D_SVT_SAMPLER2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3D_SVT_SAMPLER3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3D_SVT_SAMPLERCUBE; -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return D3D_SVT_SAMPLER; -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ case HLSL_TYPE_STRING: -+ return D3D_SVT_STRING; -+ case HLSL_TYPE_TEXTURE: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3D_SVT_TEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3D_SVT_TEXTURE2D; -+ case HLSL_SAMPLER_DIM_2DMS: -+ return D3D_SVT_TEXTURE2DMS; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3D_SVT_TEXTURE3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3D_SVT_TEXTURECUBE; -+ case HLSL_SAMPLER_DIM_GENERIC: -+ return D3D_SVT_TEXTURE; -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ case HLSL_TYPE_UINT: -+ return D3D_SVT_UINT; -+ case HLSL_TYPE_VERTEXSHADER: -+ return D3D_SVT_VERTEXSHADER; -+ case HLSL_TYPE_VOID: -+ return D3D_SVT_VOID; -+ case HLSL_TYPE_UAV: -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3D_SVT_RWTEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3D_SVT_RWTEXTURE2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3D_SVT_RWTEXTURE3D; -+ case HLSL_SAMPLER_DIM_1DARRAY: -+ return D3D_SVT_RWTEXTURE1DARRAY; -+ case HLSL_SAMPLER_DIM_2DARRAY: -+ return D3D_SVT_RWTEXTURE2DARRAY; -+ default: -+ vkd3d_unreachable(); -+ } -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) -+{ -+ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); -+ const char *name = array_type->name ? array_type->name : ""; -+ const struct hlsl_profile_info *profile = ctx->profile; -+ unsigned int field_count = 0, array_size = 0; -+ size_t fields_offset = 0, name_offset = 0; -+ size_t i; -+ -+ if (type->bytecode_offset) -+ return; -+ -+ if (profile->major_version >= 5) -+ name_offset = put_string(buffer, name); -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ array_size = hlsl_get_multiarray_size(type); -+ -+ if (array_type->class == HLSL_CLASS_STRUCT) -+ { -+ field_count = array_type->e.record.field_count; -+ -+ for (i = 0; i < field_count; ++i) -+ { -+ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -+ -+ field->name_bytecode_offset = put_string(buffer, field->name); -+ write_sm4_type(ctx, buffer, field->type); -+ } -+ -+ fields_offset = bytecode_align(buffer); -+ -+ for (i = 0; i < field_count; ++i) -+ { -+ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; -+ -+ put_u32(buffer, field->name_bytecode_offset); -+ put_u32(buffer, field->type->bytecode_offset); -+ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); -+ } -+ } -+ -+ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); -+ put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); -+ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); -+ put_u32(buffer, fields_offset); -+ -+ if (profile->major_version >= 5) -+ { -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, 0); /* FIXME: unknown */ -+ put_u32(buffer, name_offset); -+ } -+} -+ -+static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) -+{ -+ if (type->class == HLSL_CLASS_ARRAY) -+ return sm4_resource_type(type->e.array.type); -+ -+ switch (type->base_type) -+ { -+ case HLSL_TYPE_SAMPLER: -+ return D3D_SIT_SAMPLER; -+ case HLSL_TYPE_TEXTURE: -+ return D3D_SIT_TEXTURE; -+ case HLSL_TYPE_UAV: -+ return D3D_SIT_UAV_RWTYPED; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) -+{ -+ if (type->class == HLSL_CLASS_ARRAY) -+ return sm4_resource_format(type->e.array.type); -+ -+ switch (type->e.resource_format->base_type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ return D3D_RETURN_TYPE_DOUBLE; -+ -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ return D3D_RETURN_TYPE_FLOAT; -+ -+ case HLSL_TYPE_INT: -+ return D3D_RETURN_TYPE_SINT; -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ return D3D_RETURN_TYPE_UINT; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) -+{ -+ if (type->class == HLSL_CLASS_ARRAY) -+ return sm4_rdef_resource_dimension(type->e.array.type); -+ -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return D3D_SRV_DIMENSION_TEXTURE1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return D3D_SRV_DIMENSION_TEXTURE2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return D3D_SRV_DIMENSION_TEXTURE3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return D3D_SRV_DIMENSION_TEXTURECUBE; -+ case HLSL_SAMPLER_DIM_1DARRAY: -+ return D3D_SRV_DIMENSION_TEXTURE1DARRAY; -+ case HLSL_SAMPLER_DIM_2DARRAY: -+ return D3D_SRV_DIMENSION_TEXTURE2DARRAY; -+ case HLSL_SAMPLER_DIM_2DMS: -+ return D3D_SRV_DIMENSION_TEXTURE2DMS; -+ case HLSL_SAMPLER_DIM_2DMSARRAY: -+ return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; -+ case HLSL_SAMPLER_DIM_CUBEARRAY: -+ return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; -+ case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ return D3D_SRV_DIMENSION_BUFFER; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+struct extern_resource -+{ -+ /* var is only not NULL if this resource is a whole variable, so it may be responsible for more -+ * than one component. */ -+ const struct hlsl_ir_var *var; -+ -+ char *name; -+ struct hlsl_type *data_type; -+ bool is_user_packed; -+ -+ enum hlsl_regset regset; -+ unsigned int id, bind_count; -+}; -+ -+static int sm4_compare_extern_resources(const void *a, const void *b) -+{ -+ const struct extern_resource *aa = (const struct extern_resource *)a; -+ const struct extern_resource *bb = (const struct extern_resource *)b; -+ int r; -+ -+ if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) -+ return r; -+ -+ return vkd3d_u32_compare(aa->id, bb->id); -+} -+ -+static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < count; ++i) -+ vkd3d_free(extern_resources[i].name); -+ vkd3d_free(extern_resources); -+} -+ -+static const char *string_skip_tag(const char *string) -+{ -+ if (!strncmp(string, "", strlen(""))) -+ return string + strlen(""); -+ return string; -+} -+ -+static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+{ -+ bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; -+ struct extern_resource *extern_resources = NULL; -+ const struct hlsl_ir_var *var; -+ enum hlsl_regset regset; -+ size_t capacity = 0; -+ char *name; -+ -+ *count = 0; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (separate_components) -+ { -+ unsigned int component_count = hlsl_type_component_count(var->data_type); -+ unsigned int k, regset_offset; -+ -+ for (k = 0; k < component_count; ++k) -+ { -+ struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ struct vkd3d_string_buffer *name_buffer; -+ -+ if (!hlsl_type_is_resource(component_type)) -+ continue; -+ -+ regset = hlsl_type_get_regset(component_type); -+ regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); -+ -+ if (regset_offset > var->regs[regset].allocation_size) -+ continue; -+ -+ if (var->objects_usage[regset][regset_offset].used) -+ { -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ hlsl_release_string_buffer(ctx, name_buffer); -+ return NULL; -+ } -+ hlsl_release_string_buffer(ctx, name_buffer); -+ -+ extern_resources[*count].var = NULL; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = component_type; -+ extern_resources[*count].is_user_packed = false; -+ -+ extern_resources[*count].regset = regset; -+ extern_resources[*count].id = var->regs[regset].id + regset_offset; -+ extern_resources[*count].bind_count = 1; -+ -+ ++*count; -+ } -+ } -+ } -+ else -+ { -+ if (!hlsl_type_is_resource(var->data_type)) -+ continue; -+ regset = hlsl_type_get_regset(var->data_type); -+ if (!var->regs[regset].allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = var; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = var->data_type; -+ extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -+ -+ extern_resources[*count].regset = regset; -+ extern_resources[*count].id = var->regs[regset].id; -+ extern_resources[*count].bind_count = var->bind_count[regset]; -+ -+ ++*count; -+ } -+ } -+ -+ qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); -+ return extern_resources; -+} -+ -+static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -+{ -+ unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; -+ size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -+ size_t cbuffer_position, resource_position, creator_position; -+ const struct hlsl_profile_info *profile = ctx->profile; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ struct extern_resource *extern_resources; -+ const struct hlsl_buffer *cbuffer; -+ const struct hlsl_ir_var *var; -+ -+ static const uint16_t target_types[] = -+ { -+ 0xffff, /* PIXEL */ -+ 0xfffe, /* VERTEX */ -+ 0x4753, /* GEOMETRY */ -+ 0x4853, /* HULL */ -+ 0x4453, /* DOMAIN */ -+ 0x4353, /* COMPUTE */ -+ }; -+ -+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -+ -+ resource_count += extern_resources_count; -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (cbuffer->reg.allocated) -+ { -+ ++cbuffer_count; -+ ++resource_count; -+ } -+ } -+ -+ put_u32(&buffer, cbuffer_count); -+ cbuffer_position = put_u32(&buffer, 0); -+ put_u32(&buffer, resource_count); -+ resource_position = put_u32(&buffer, 0); -+ put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), -+ target_types[profile->type])); -+ put_u32(&buffer, 0); /* FIXME: compilation flags */ -+ creator_position = put_u32(&buffer, 0); -+ -+ if (profile->major_version >= 5) -+ { -+ put_u32(&buffer, TAG_RD11); -+ put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ -+ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -+ put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ -+ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ -+ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ -+ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -+ put_u32(&buffer, 0); /* unknown; possibly a null terminator */ -+ } -+ -+ /* Bound resources. */ -+ -+ resources_offset = bytecode_align(&buffer); -+ set_u32(&buffer, resource_position, resources_offset); -+ -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ uint32_t flags = 0; -+ -+ if (resource->is_user_packed) -+ flags |= D3D_SIF_USERPACKED; -+ -+ put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, sm4_resource_type(resource->data_type)); -+ if (resource->regset == HLSL_REGSET_SAMPLERS) -+ { -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ } -+ else -+ { -+ unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx; -+ -+ put_u32(&buffer, sm4_resource_format(resource->data_type)); -+ put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); -+ put_u32(&buffer, ~0u); /* FIXME: multisample count */ -+ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; -+ } -+ put_u32(&buffer, resource->id); -+ put_u32(&buffer, resource->bind_count); -+ put_u32(&buffer, flags); -+ } -+ -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ uint32_t flags = 0; -+ -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ if (cbuffer->reservation.reg_type) -+ flags |= D3D_SIF_USERPACKED; -+ -+ put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); -+ put_u32(&buffer, 0); /* return type */ -+ put_u32(&buffer, 0); /* dimension */ -+ put_u32(&buffer, 0); /* multisample count */ -+ put_u32(&buffer, cbuffer->reg.id); /* bind point */ -+ put_u32(&buffer, 1); /* bind count */ -+ put_u32(&buffer, flags); /* flags */ -+ } -+ -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ -+ string_offset = put_string(&buffer, resource->name); -+ set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); -+ } -+ -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ string_offset = put_string(&buffer, cbuffer->name); -+ set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); -+ } -+ -+ /* Buffers. */ -+ -+ cbuffers_offset = bytecode_align(&buffer); -+ set_u32(&buffer, cbuffer_position, cbuffers_offset); -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ unsigned int var_count = 0; -+ -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_uniform && var->buffer == cbuffer) -+ ++var_count; -+ } -+ -+ put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, var_count); -+ put_u32(&buffer, 0); /* variable offset */ -+ put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); -+ put_u32(&buffer, 0); /* FIXME: flags */ -+ put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); -+ } -+ -+ i = 0; -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ string_offset = put_string(&buffer, cbuffer->name); -+ set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); -+ } -+ -+ i = 0; -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ size_t vars_start = bytecode_align(&buffer); -+ -+ if (!cbuffer->reg.allocated) -+ continue; -+ -+ set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_uniform && var->buffer == cbuffer) -+ { -+ uint32_t flags = 0; -+ -+ if (var->last_read) -+ flags |= D3D_SVF_USED; -+ -+ put_u32(&buffer, 0); /* name */ -+ put_u32(&buffer, var->buffer_offset * sizeof(float)); -+ put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); -+ put_u32(&buffer, flags); -+ put_u32(&buffer, 0); /* type */ -+ put_u32(&buffer, 0); /* FIXME: default value */ -+ -+ if (profile->major_version >= 5) -+ { -+ put_u32(&buffer, 0); /* texture start */ -+ put_u32(&buffer, 0); /* texture count */ -+ put_u32(&buffer, 0); /* sampler start */ -+ put_u32(&buffer, 0); /* sampler count */ -+ } -+ } -+ } -+ -+ j = 0; -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_uniform && var->buffer == cbuffer) -+ { -+ const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); -+ size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); -+ size_t string_offset = put_string(&buffer, var->name); -+ -+ set_u32(&buffer, var_offset, string_offset); -+ write_sm4_type(ctx, &buffer, var->data_type); -+ set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); -+ ++j; -+ } -+ } -+ } -+ -+ creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); -+ set_u32(&buffer, creator_position, creator_offset); -+ -+ add_section(ctx, dxbc, TAG_RDEF, &buffer); -+ -+ sm4_free_extern_resources(extern_resources, extern_resources_count); -+} -+ -+static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -+{ -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return VKD3D_SM4_RESOURCE_TEXTURE_1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return VKD3D_SM4_RESOURCE_TEXTURE_2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return VKD3D_SM4_RESOURCE_TEXTURE_3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; -+ case HLSL_SAMPLER_DIM_1DARRAY: -+ return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; -+ case HLSL_SAMPLER_DIM_2DARRAY: -+ return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; -+ case HLSL_SAMPLER_DIM_2DMS: -+ return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; -+ case HLSL_SAMPLER_DIM_2DMSARRAY: -+ return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; -+ case HLSL_SAMPLER_DIM_CUBEARRAY: -+ return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; -+ case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ return VKD3D_SM4_RESOURCE_BUFFER; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+struct sm4_instruction_modifier -+{ -+ enum vkd3d_sm4_instruction_modifier type; -+ -+ union -+ { -+ struct -+ { -+ int u, v, w; -+ } aoffimmi; -+ } u; -+}; -+ -+static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) -+{ -+ uint32_t word = 0; -+ -+ word |= VKD3D_SM4_MODIFIER_MASK & imod->type; -+ -+ switch (imod->type) -+ { -+ case VKD3D_SM4_MODIFIER_AOFFIMMI: -+ assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); -+ assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); -+ assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); -+ word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; -+ word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; -+ word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ return word; -+} -+ -+struct sm4_register -+{ -+ enum vkd3d_shader_register_type type; -+ struct vkd3d_shader_register_index idx[2]; -+ unsigned int idx_count; -+ enum vkd3d_sm4_dimension dim; -+ uint32_t immconst_uint[4]; -+ unsigned int mod; -+}; -+ -+struct sm4_instruction -+{ -+ enum vkd3d_sm4_opcode opcode; -+ -+ struct sm4_instruction_modifier modifiers[1]; -+ unsigned int modifier_count; -+ -+ struct sm4_dst_register -+ { -+ struct sm4_register reg; -+ unsigned int writemask; -+ } dsts[2]; -+ unsigned int dst_count; -+ -+ struct sm4_src_register -+ { -+ struct sm4_register reg; -+ enum vkd3d_sm4_swizzle_type swizzle_type; -+ unsigned int swizzle; -+ } srcs[5]; -+ unsigned int src_count; -+ -+ unsigned int byte_stride; -+ -+ uint32_t idx[3]; -+ unsigned int idx_count; -+}; -+ -+static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, -+ unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, -+ const struct hlsl_deref *deref) -+{ -+ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); -+ const struct hlsl_ir_var *var = deref->var; -+ -+ if (var->is_uniform) -+ { -+ enum hlsl_regset regset = hlsl_type_get_regset(data_type); -+ -+ if (regset == HLSL_REGSET_TEXTURES) -+ { -+ reg->type = VKD3DSPR_RESOURCE; -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ if (swizzle_type) -+ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(regset == HLSL_REGSET_TEXTURES); -+ reg->idx_count = 1; -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else if (regset == HLSL_REGSET_UAVS) -+ { -+ reg->type = VKD3DSPR_UAV; -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ if (swizzle_type) -+ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(regset == HLSL_REGSET_UAVS); -+ reg->idx_count = 1; -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else if (regset == HLSL_REGSET_SAMPLERS) -+ { -+ reg->type = VKD3DSPR_SAMPLER; -+ reg->dim = VKD3D_SM4_DIMENSION_NONE; -+ if (swizzle_type) -+ *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ assert(regset == HLSL_REGSET_SAMPLERS); -+ reg->idx_count = 1; -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; -+ -+ assert(data_type->class <= HLSL_CLASS_VECTOR); -+ reg->type = VKD3DSPR_CONSTBUFFER; -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ if (swizzle_type) -+ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = offset / 4; -+ reg->idx_count = 2; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); -+ } -+ } -+ else if (var->is_input_semantic) -+ { -+ bool has_idx; -+ -+ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -+ -+ if (has_idx) -+ { -+ reg->idx[0].offset = var->semantic.index + offset / 4; -+ reg->idx_count = 1; -+ } -+ -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ } -+ else -+ { -+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -+ -+ assert(hlsl_reg.allocated); -+ reg->type = VKD3DSPR_INPUT; -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ if (swizzle_type) -+ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0].offset = hlsl_reg.id; -+ reg->idx_count = 1; -+ *writemask = hlsl_reg.writemask; -+ } -+ } -+ else if (var->is_output_semantic) -+ { -+ bool has_idx; -+ -+ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -+ -+ if (has_idx) -+ { -+ reg->idx[0].offset = var->semantic.index + offset / 4; -+ reg->idx_count = 1; -+ } -+ -+ if (reg->type == VKD3DSPR_DEPTHOUT) -+ reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -+ else -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ } -+ else -+ { -+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -+ -+ assert(hlsl_reg.allocated); -+ reg->type = VKD3DSPR_OUTPUT; -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ reg->idx[0].offset = hlsl_reg.id; -+ reg->idx_count = 1; -+ *writemask = hlsl_reg.writemask; -+ } -+ } -+ else -+ { -+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -+ -+ assert(hlsl_reg.allocated); -+ reg->type = VKD3DSPR_TEMP; -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ if (swizzle_type) -+ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0].offset = hlsl_reg.id; -+ reg->idx_count = 1; -+ *writemask = hlsl_reg.writemask; -+ } -+} -+ -+static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, -+ const struct hlsl_deref *deref, unsigned int map_writemask) -+{ -+ unsigned int writemask; -+ -+ sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); -+ if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) -+ src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -+} -+ -+static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, -+ enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) -+{ -+ assert(instr->reg.allocated); -+ reg->type = VKD3DSPR_TEMP; -+ reg->dim = VKD3D_SM4_DIMENSION_VEC4; -+ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; -+ reg->idx[0].offset = instr->reg.id; -+ reg->idx_count = 1; -+ *writemask = instr->reg.writemask; -+} -+ -+static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) -+{ -+ unsigned int swizzle_type; -+ -+ sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); -+} -+ -+static void sm4_src_from_constant_value(struct sm4_src_register *src, -+ const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) -+{ -+ src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ src->reg.type = VKD3DSPR_IMMCONST; -+ if (width == 1) -+ { -+ src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; -+ src->reg.immconst_uint[0] = value->u[0].u; -+ } -+ else -+ { -+ unsigned int i, j = 0; -+ -+ src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; -+ for (i = 0; i < 4; ++i) -+ { -+ if ((map_writemask & (1u << i)) && (j < width)) -+ src->reg.immconst_uint[i] = value->u[j++].u; -+ else -+ src->reg.immconst_uint[i] = 0; -+ } -+ } -+} -+ -+static void sm4_src_from_node(struct sm4_src_register *src, -+ const struct hlsl_ir_node *instr, unsigned int map_writemask) -+{ -+ unsigned int writemask; -+ -+ if (instr->type == HLSL_IR_CONSTANT) -+ { -+ struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); -+ -+ sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); -+ return; -+ } -+ -+ sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); -+ if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) -+ src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -+} -+ -+static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info; -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -+ uint32_t sm4_reg_type, reg_dim; -+ uint32_t token = 0; -+ unsigned int j; -+ -+ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); -+ if (!register_type_info) -+ { -+ FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); -+ sm4_reg_type = VKD3D_SM4_RT_TEMP; -+ } -+ else -+ { -+ sm4_reg_type = register_type_info->sm4_type; -+ } -+ -+ reg_dim = dst->reg.dim; -+ -+ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; -+ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; -+ put_u32(buffer, token); -+ -+ for (j = 0; j < dst->reg.idx_count; ++j) -+ { -+ put_u32(buffer, dst->reg.idx[j].offset); -+ assert(!dst->reg.idx[j].rel_addr); -+ } -+} -+ -+static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) -+{ -+ const struct vkd3d_sm4_register_type_info *register_type_info; -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -+ uint32_t sm4_reg_type, reg_dim; -+ uint32_t token = 0; -+ unsigned int j; -+ -+ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); -+ if (!register_type_info) -+ { -+ FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); -+ sm4_reg_type = VKD3D_SM4_RT_TEMP; -+ } -+ else -+ { -+ sm4_reg_type = register_type_info->sm4_type; -+ } -+ -+ reg_dim = src->reg.dim; -+ -+ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; -+ token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; -+ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ { -+ token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; -+ token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; -+ } -+ if (src->reg.mod) -+ token |= VKD3D_SM4_EXTENDED_OPERAND; -+ put_u32(buffer, token); -+ -+ if (src->reg.mod) -+ put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) -+ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); -+ -+ for (j = 0; j < src->reg.idx_count; ++j) -+ { -+ put_u32(buffer, src->reg.idx[j].offset); -+ assert(!src->reg.idx[j].rel_addr); -+ } -+ -+ if (src->reg.type == VKD3DSPR_IMMCONST) -+ { -+ put_u32(buffer, src->reg.immconst_uint[0]); -+ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) -+ { -+ put_u32(buffer, src->reg.immconst_uint[1]); -+ put_u32(buffer, src->reg.immconst_uint[2]); -+ put_u32(buffer, src->reg.immconst_uint[3]); -+ } -+ } -+} -+ -+static uint32_t sm4_register_order(const struct sm4_register *reg) -+{ -+ uint32_t order = 1; -+ if (reg->type == VKD3DSPR_IMMCONST) -+ order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; -+ order += reg->idx_count; -+ if (reg->mod) -+ ++order; -+ return order; -+} -+ -+static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) -+{ -+ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -+ uint32_t token = instr->opcode; -+ unsigned int size = 1, i, j; -+ -+ size += instr->modifier_count; -+ for (i = 0; i < instr->dst_count; ++i) -+ size += sm4_register_order(&instr->dsts[i].reg); -+ for (i = 0; i < instr->src_count; ++i) -+ size += sm4_register_order(&instr->srcs[i].reg); -+ size += instr->idx_count; -+ if (instr->byte_stride) -+ ++size; -+ -+ token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); -+ -+ if (instr->modifier_count > 0) -+ token |= VKD3D_SM4_INSTRUCTION_MODIFIER; -+ put_u32(buffer, token); -+ -+ for (i = 0; i < instr->modifier_count; ++i) -+ { -+ token = sm4_encode_instruction_modifier(&instr->modifiers[i]); -+ if (instr->modifier_count > i + 1) -+ token |= VKD3D_SM4_INSTRUCTION_MODIFIER; -+ put_u32(buffer, token); -+ } -+ -+ for (i = 0; i < instr->dst_count; ++i) -+ sm4_write_dst_register(tpf, &instr->dsts[i]); -+ -+ for (i = 0; i < instr->src_count; ++i) -+ sm4_write_src_register(tpf, &instr->srcs[i]); -+ -+ if (instr->byte_stride) -+ put_u32(buffer, instr->byte_stride); -+ -+ for (j = 0; j < instr->idx_count; ++j) -+ put_u32(buffer, instr->idx[j]); -+} -+ -+static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, -+ const struct hlsl_ir_node *texel_offset) -+{ -+ struct sm4_instruction_modifier modif; -+ struct hlsl_ir_constant *offset; -+ -+ if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) -+ return false; -+ offset = hlsl_ir_constant(texel_offset); -+ -+ modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; -+ modif.u.aoffimmi.u = offset->value.u[0].i; -+ modif.u.aoffimmi.v = 0; -+ modif.u.aoffimmi.w = 0; -+ if (offset->node.data_type->dimx > 1) -+ modif.u.aoffimmi.v = offset->value.u[1].i; -+ if (offset->node.data_type->dimx > 2) -+ modif.u.aoffimmi.w = offset->value.u[2].i; -+ if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 -+ || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 -+ || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) -+ return false; -+ -+ instr->modifiers[instr->modifier_count++] = modif; -+ return true; -+} -+ -+static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) -+{ -+ const struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, -+ -+ .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, -+ .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, -+ .srcs[0].reg.idx[0].offset = cbuffer->reg.id, -+ .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, -+ .srcs[0].reg.idx_count = 2, -+ .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, -+ .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), -+ .src_count = 1, -+ }; -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) -+{ -+ struct hlsl_type *component_type; -+ unsigned int i; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_SAMPLER, -+ -+ .dsts[0].reg.type = VKD3DSPR_SAMPLER, -+ .dsts[0].reg.idx_count = 1, -+ .dst_count = 1, -+ }; -+ -+ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); -+ -+ if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) -+ instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; -+ -+ assert(resource->regset == HLSL_REGSET_SAMPLERS); -+ -+ for (i = 0; i < resource->bind_count; ++i) -+ { -+ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -+ continue; -+ -+ instr.dsts[0].reg.idx[0].offset = resource->id + i; -+ write_sm4_instruction(tpf, &instr); -+ } -+} -+ -+static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, -+ bool uav) -+{ -+ enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; -+ struct hlsl_type *component_type; -+ struct sm4_instruction instr; -+ unsigned int i; -+ -+ assert(resource->regset == regset); -+ -+ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); -+ -+ for (i = 0; i < resource->bind_count; ++i) -+ { -+ if (resource->var && !resource->var->objects_usage[regset][i].used) -+ continue; -+ -+ instr = (struct sm4_instruction) -+ { -+ .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, -+ .dsts[0].reg.idx[0].offset = resource->id + i, -+ .dsts[0].reg.idx_count = 1, -+ .dst_count = 1, -+ -+ .idx[0] = sm4_resource_format(component_type) * 0x1111, -+ .idx_count = 1, -+ }; -+ -+ if (uav) -+ { -+ switch (resource->data_type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; -+ instr.byte_stride = resource->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; -+ break; -+ default: -+ instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; -+ break; -+ } -+ } -+ else -+ { -+ instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; -+ } -+ instr.opcode |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); -+ -+ if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) -+ { -+ instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; -+ } -+ -+ write_sm4_instruction(tpf, &instr); -+ } -+} -+ -+static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) -+{ -+ const struct hlsl_profile_info *profile = tpf->ctx->profile; -+ const bool output = var->is_output_semantic; -+ D3D_NAME usage; -+ bool has_idx; -+ -+ struct sm4_instruction instr = -+ { -+ .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, -+ .dst_count = 1, -+ }; -+ -+ if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) -+ { -+ if (has_idx) -+ { -+ instr.dsts[0].reg.idx[0].offset = var->semantic.index; -+ instr.dsts[0].reg.idx_count = 1; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx_count = 0; -+ } -+ instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; -+ } -+ else -+ { -+ instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; -+ instr.dsts[0].reg.idx_count = 1; -+ instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; -+ } -+ -+ if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) -+ instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; -+ -+ hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); -+ if (usage == ~0u) -+ usage = D3D_NAME_UNDEFINED; -+ -+ if (var->is_input_semantic) -+ { -+ switch (usage) -+ { -+ case D3D_NAME_UNDEFINED: -+ instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; -+ break; -+ -+ case D3D_NAME_INSTANCE_ID: -+ case D3D_NAME_PRIMITIVE_ID: -+ case D3D_NAME_VERTEX_ID: -+ instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; -+ break; -+ -+ default: -+ instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; -+ break; -+ } -+ -+ if (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ { -+ enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; -+ -+ if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) -+ mode = VKD3DSIM_CONSTANT; -+ -+ instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -+ } -+ } -+ else -+ { -+ if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; -+ else -+ instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; -+ } -+ -+ switch (usage) -+ { -+ case D3D_NAME_COVERAGE: -+ case D3D_NAME_DEPTH: -+ case D3D_NAME_DEPTH_GREATER_EQUAL: -+ case D3D_NAME_DEPTH_LESS_EQUAL: -+ case D3D_NAME_TARGET: -+ case D3D_NAME_UNDEFINED: -+ break; -+ -+ default: -+ instr.idx_count = 1; -+ instr.idx[0] = usage; -+ break; -+ } -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_TEMPS, -+ -+ .idx = {temp_count}, -+ .idx_count = 1, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, -+ -+ .idx[0] = thread_count[0], -+ .idx[1] = thread_count[1], -+ .idx[2] = thread_count[2], -+ .idx_count = 3, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_ret(const struct tpf_writer *tpf) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_RET, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = opcode; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); -+ instr.srcs[0].reg.mod = src_mod; -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = opcode; -+ -+ assert(dst_idx < ARRAY_SIZE(instr.dsts)); -+ sm4_dst_from_node(&instr.dsts[dst_idx], dst); -+ assert(1 - dst_idx >= 0); -+ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; -+ instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; -+ instr.dsts[1 - dst_idx].reg.idx_count = 0; -+ instr.dst_count = 2; -+ -+ sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = opcode; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); -+ sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+/* dp# instructions don't map the swizzle. */ -+static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = opcode; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, -+ enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, -+ const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = opcode; -+ -+ assert(dst_idx < ARRAY_SIZE(instr.dsts)); -+ sm4_dst_from_node(&instr.dsts[dst_idx], dst); -+ assert(1 - dst_idx >= 0); -+ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; -+ instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; -+ instr.dsts[1 - dst_idx].reg.idx_count = 0; -+ instr.dst_count = 2; -+ -+ sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); -+ sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, -+ const struct hlsl_ir_node *src3) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = opcode; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); -+ sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); -+ sm4_src_from_node(&instr.srcs[2], src3, instr.dsts[0].writemask); -+ instr.src_count = 3; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+ const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, -+ const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, -+ enum hlsl_sampler_dim dim) -+{ -+ const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); -+ bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE -+ && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); -+ bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); -+ unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ if (uav) -+ instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; -+ else -+ instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; -+ -+ if (texel_offset) -+ { -+ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -+ { -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ "Offset must resolve to integer literal in the range -8 to 7."); -+ return; -+ } -+ } -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ if (!uav) -+ { -+ /* Mipmap level is in the last component in the IR, but needs to be in the W -+ * component in the instruction. */ -+ unsigned int dim_count = hlsl_sampler_dim_count(dim); -+ -+ if (dim_count == 1) -+ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; -+ if (dim_count == 2) -+ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; -+ } -+ -+ sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); -+ -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ -+ instr.src_count = 2; -+ -+ if (multisampled) -+ { -+ if (sample_index->type == HLSL_IR_CONSTANT) -+ { -+ struct sm4_register *reg = &instr.srcs[2].reg; -+ struct hlsl_ir_constant *index; -+ -+ index = hlsl_ir_constant(sample_index); -+ -+ memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); -+ instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ reg->type = VKD3DSPR_IMMCONST; -+ reg->dim = VKD3D_SM4_DIMENSION_SCALAR; -+ reg->immconst_uint[0] = index->value.u[0].u; -+ } -+ else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) -+ { -+ hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); -+ } -+ else -+ { -+ sm4_src_from_node(&instr.srcs[2], sample_index, 0); -+ } -+ -+ ++instr.src_count; -+ } -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -+ const struct hlsl_ir_node *coords = load->coords.node; -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_deref *sampler = &load->sampler; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_SAMPLE: -+ instr.opcode = VKD3D_SM4_OP_SAMPLE; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_CMP: -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_C; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_CMP_LZ: -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_LOD: -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_B; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (texel_offset) -+ { -+ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -+ { -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ "Offset must resolve to integer literal in the range -8 to 7."); -+ return; -+ } -+ } -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); -+ instr.src_count = 3; -+ -+ if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD -+ || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) -+ { -+ sm4_src_from_node(&instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ ++instr.src_count; -+ } -+ else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) -+ { -+ sm4_src_from_node(&instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_node(&instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); -+ instr.src_count += 2; -+ } -+ else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP -+ || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) -+ { -+ sm4_src_from_node(&instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); -+ ++instr.src_count; -+ } -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; -+ -+ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -+ if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ instr.opcode |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *dst = &load->node; -+ struct sm4_instruction instr; -+ -+ assert(dst->data_type->base_type == HLSL_TYPE_UINT || dst->data_type->base_type == HLSL_TYPE_FLOAT); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_RESINFO; -+ if (dst->data_type->base_type == HLSL_TYPE_UINT) -+ instr.opcode |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static bool type_is_float(const struct hlsl_type *type) -+{ -+ return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; -+} -+ -+static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, -+ const struct hlsl_ir_node *arg, uint32_t mask) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_AND; -+ -+ sm4_dst_from_node(&instr.dsts[0], &expr->node); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); -+ instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; -+ instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; -+ instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; -+ instr.srcs[1].reg.immconst_uint[0] = mask; -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) -+{ -+ static const union -+ { -+ uint32_t u; -+ float f; -+ } one = { .f = 1.0 }; -+ const struct hlsl_ir_node *arg1 = expr->operands[0].node; -+ const struct hlsl_type *dst_type = expr->node.data_type; -+ const struct hlsl_type *src_type = arg1->data_type; -+ -+ /* Narrowing casts were already lowered. */ -+ assert(src_type->dimx == dst_type->dimx); -+ -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ switch (src_type->base_type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_TYPE_INT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_TYPE_UINT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ write_sm4_cast_from_bool(tpf, expr, arg1, one.u); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_INT: -+ switch (src_type->base_type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ write_sm4_cast_from_bool(tpf, expr, arg1, 1); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_UINT: -+ switch (src_type->base_type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ write_sm4_cast_from_bool(tpf, expr, arg1, 1); -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ /* Casts to bool should have already been lowered. */ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, -+ const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; -+ -+ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) -+{ -+ const struct hlsl_ir_node *arg1 = expr->operands[0].node; -+ const struct hlsl_ir_node *arg2 = expr->operands[1].node; -+ const struct hlsl_ir_node *arg3 = expr->operands[2].node; -+ const struct hlsl_type *dst_type = expr->node.data_type; -+ struct vkd3d_string_buffer *dst_type_string; -+ -+ assert(expr->node.reg.allocated); -+ -+ if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) -+ return; -+ -+ switch (expr->op) -+ { -+ case HLSL_OP1_ABS: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP1_BIT_NOT: -+ assert(type_is_integer(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_CAST: -+ write_sm4_cast(tpf, expr); -+ break; -+ -+ case HLSL_OP1_COS: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); -+ break; -+ -+ case HLSL_OP1_DSX: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSX_COARSE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSX_FINE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSY: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSY_COARSE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_DSY_FINE: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_EXP2: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_FLOOR: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_FRACT: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_LOG2: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_LOGIC_NOT: -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_NEG: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP1_REINTERPRET: -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_ROUND: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_RSQ: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_SAT: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV -+ | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), -+ &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_SIN: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); -+ break; -+ -+ case HLSL_OP1_SQRT: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP1_TRUNC: -+ assert(type_is_float(dst_type)); -+ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); -+ break; -+ -+ case HLSL_OP2_ADD: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP2_BIT_AND: -+ assert(type_is_integer(dst_type)); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_BIT_OR: -+ assert(type_is_integer(dst_type)); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_BIT_XOR: -+ assert(type_is_integer(dst_type)); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_DIV: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP2_DOT: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ switch (arg1->data_type->dimx) -+ { -+ case 4: -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); -+ break; -+ -+ case 3: -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); -+ break; -+ -+ case 2: -+ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); -+ break; -+ -+ case 1: -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP2_EQUAL: -+ { -+ const struct hlsl_type *src_type = arg1->data_type; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); -+ break; -+ } -+ break; -+ } -+ -+ case HLSL_OP2_GEQUAL: -+ { -+ const struct hlsl_type *src_type = arg1->data_type; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_INT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); -+ break; -+ } -+ break; -+ } -+ -+ case HLSL_OP2_LESS: -+ { -+ const struct hlsl_type *src_type = arg1->data_type; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_INT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); -+ break; -+ } -+ break; -+ } -+ -+ case HLSL_OP2_LOGIC_AND: -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_LOGIC_OR: -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_LSHIFT: -+ assert(type_is_integer(dst_type)); -+ assert(dst_type->base_type != HLSL_TYPE_BOOL); -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_OP2_MAX: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_INT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP2_MIN: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_INT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP2_MOD: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP2_MUL: -+ switch (dst_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ /* Using IMUL instead of UMUL because we're taking the low -+ * bits, and the native compiler generates IMUL. */ -+ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); -+ } -+ break; -+ -+ case HLSL_OP2_NEQUAL: -+ { -+ const struct hlsl_type *src_type = arg1->data_type; -+ -+ assert(dst_type->base_type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->base_type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -+ debug_hlsl_type(tpf->ctx, src_type)); -+ break; -+ } -+ break; -+ } -+ -+ case HLSL_OP2_RSHIFT: -+ assert(type_is_integer(dst_type)); -+ assert(dst_type->base_type != HLSL_TYPE_BOOL); -+ write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -+ &expr->node, arg1, arg2); -+ break; -+ -+ case HLSL_OP3_MOVC: -+ write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -+ } -+ -+ hlsl_release_string_buffer(tpf->ctx, dst_type_string); -+} -+ -+static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, -+ .src_count = 1, -+ }; -+ -+ assert(iff->condition.node->data_type->dimx == 1); -+ -+ sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -+ write_sm4_instruction(tpf, &instr); -+ -+ write_sm4_block(tpf, &iff->then_block); -+ -+ if (!list_empty(&iff->else_block.instrs)) -+ { -+ instr.opcode = VKD3D_SM4_OP_ELSE; -+ instr.src_count = 0; -+ write_sm4_instruction(tpf, &instr); -+ -+ write_sm4_block(tpf, &iff->else_block); -+ } -+ -+ instr.opcode = VKD3D_SM4_OP_ENDIF; -+ instr.src_count = 0; -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) -+{ -+ struct sm4_instruction instr = {0}; -+ -+ switch (jump->type) -+ { -+ case HLSL_IR_JUMP_BREAK: -+ instr.opcode = VKD3D_SM4_OP_BREAK; -+ break; -+ -+ case HLSL_IR_JUMP_DISCARD_NZ: -+ { -+ instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; -+ -+ memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -+ instr.src_count = 1; -+ sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); -+ break; -+ } -+ -+ case HLSL_IR_JUMP_RETURN: -+ vkd3d_unreachable(); -+ -+ default: -+ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ return; -+ } -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+/* Does this variable's data come directly from the API user, rather than being -+ * temporary or from a previous shader stage? -+ * I.e. is it a uniform or VS input? */ -+static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) -+{ -+ if (var->is_uniform) -+ return true; -+ -+ return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; -+} -+ -+static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) -+{ -+ const struct hlsl_type *type = load->node.data_type; -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ -+ sm4_dst_from_node(&instr.dsts[0], &load->node); -+ instr.dst_count = 1; -+ -+ assert(type->class <= HLSL_CLASS_LAST_NUMERIC); -+ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) -+ { -+ struct hlsl_constant_value value; -+ -+ /* Uniform bools can be specified as anything, but internal bools always -+ * have 0 for false and ~0 for true. Normalize that here. */ -+ -+ instr.opcode = VKD3D_SM4_OP_MOVC; -+ -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); -+ -+ memset(&value, 0xff, sizeof(value)); -+ sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); -+ memset(&value, 0, sizeof(value)); -+ sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].writemask); -+ instr.src_count = 3; -+ } -+ else -+ { -+ instr.opcode = VKD3D_SM4_OP_MOV; -+ -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); -+ instr.src_count = 1; -+ } -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_LOOP, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+ -+ write_sm4_block(tpf, &loop->body); -+ -+ instr.opcode = VKD3D_SM4_OP_ENDLOOP; -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, -+ unsigned int swizzle, const struct hlsl_ir_node *texel_offset) -+{ -+ struct sm4_src_register *src; -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ -+ instr.opcode = VKD3D_SM4_OP_GATHER4; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); -+ -+ if (texel_offset) -+ { -+ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -+ { -+ if (tpf->ctx->profile->major_version < 5) -+ { -+ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); -+ return; -+ } -+ instr.opcode = VKD3D_SM5_OP_GATHER4_PO; -+ sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); -+ } -+ } -+ -+ sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); -+ -+ src = &instr.srcs[instr.src_count++]; -+ sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); -+ src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; -+ src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; -+ src->swizzle = swizzle; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -+ const struct hlsl_ir_node *sample_index = load->sample_index.node; -+ const struct hlsl_ir_node *coords = load->coords.node; -+ -+ if (load->sampler.var && !load->sampler.var->is_uniform) -+ { -+ hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -+ return; -+ } -+ -+ if (!load->resource.var->is_uniform) -+ { -+ hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); -+ return; -+ } -+ -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_LOAD: -+ write_sm4_ld(tpf, &load->node, &load->resource, -+ coords, sample_index, texel_offset, load->sampling_dim); -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_CMP: -+ case HLSL_RESOURCE_SAMPLE_CMP_LZ: -+ case HLSL_RESOURCE_SAMPLE_LOD: -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ /* Combined sample expressions were lowered. */ -+ assert(load->sampler.var); -+ write_sm4_sample(tpf, load); -+ break; -+ -+ case HLSL_RESOURCE_GATHER_RED: -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(X, X, X, X), texel_offset); -+ break; -+ -+ case HLSL_RESOURCE_GATHER_GREEN: -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); -+ break; -+ -+ case HLSL_RESOURCE_GATHER_BLUE: -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); -+ break; -+ -+ case HLSL_RESOURCE_GATHER_ALPHA: -+ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -+ HLSL_SWIZZLE(W, W, W, W), texel_offset); -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_INFO: -+ write_sm4_sampleinfo(tpf, load); -+ break; -+ -+ case HLSL_RESOURCE_RESINFO: -+ write_sm4_resinfo(tpf, load); -+ break; -+ } -+} -+ -+static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) -+{ -+ struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); -+ -+ if (!store->resource.var->is_uniform) -+ { -+ hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); -+ return; -+ } -+ -+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -+ { -+ hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); -+ return; -+ } -+ -+ write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); -+} -+ -+static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) -+{ -+ const struct hlsl_ir_node *rhs = store->rhs.node; -+ struct sm4_instruction instr; -+ unsigned int writemask; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_MOV; -+ -+ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); -+ instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); -+ instr.dst_count = 1; -+ -+ sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) -+{ -+ struct sm4_instruction instr; -+ unsigned int writemask; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_MOV; -+ -+ sm4_dst_from_node(&instr.dsts[0], &swizzle->node); -+ instr.dst_count = 1; -+ -+ sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); -+ instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), -+ swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) -+{ -+ const struct hlsl_ir_node *instr; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->data_type) -+ { -+ if (instr->data_type->class == HLSL_CLASS_MATRIX) -+ { -+ hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); -+ break; -+ } -+ else if (instr->data_type->class == HLSL_CLASS_OBJECT) -+ { -+ hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); -+ break; -+ } -+ -+ assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); -+ -+ if (!instr->reg.allocated) -+ { -+ assert(instr->type == HLSL_IR_CONSTANT); -+ continue; -+ } -+ } -+ -+ switch (instr->type) -+ { -+ case HLSL_IR_CALL: -+ case HLSL_IR_CONSTANT: -+ vkd3d_unreachable(); -+ -+ case HLSL_IR_EXPR: -+ write_sm4_expr(tpf, hlsl_ir_expr(instr)); -+ break; -+ -+ case HLSL_IR_IF: -+ write_sm4_if(tpf, hlsl_ir_if(instr)); -+ break; -+ -+ case HLSL_IR_JUMP: -+ write_sm4_jump(tpf, hlsl_ir_jump(instr)); -+ break; -+ -+ case HLSL_IR_LOAD: -+ write_sm4_load(tpf, hlsl_ir_load(instr)); -+ break; -+ -+ case HLSL_IR_RESOURCE_LOAD: -+ write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); -+ break; -+ -+ case HLSL_IR_RESOURCE_STORE: -+ write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); -+ break; -+ -+ case HLSL_IR_LOOP: -+ write_sm4_loop(tpf, hlsl_ir_loop(instr)); -+ break; -+ -+ case HLSL_IR_STORE: -+ write_sm4_store(tpf, hlsl_ir_store(instr)); -+ break; -+ -+ case HLSL_IR_SWIZZLE: -+ write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -+ } -+ } -+} -+ -+static void write_sm4_shdr(struct hlsl_ctx *ctx, -+ const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) -+{ -+ const struct hlsl_profile_info *profile = ctx->profile; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ struct extern_resource *extern_resources; -+ unsigned int extern_resources_count, i; -+ const struct hlsl_buffer *cbuffer; -+ const struct hlsl_ir_var *var; -+ size_t token_count_position; -+ struct tpf_writer tpf; -+ -+ static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = -+ { -+ VKD3D_SM4_PS, -+ VKD3D_SM4_VS, -+ VKD3D_SM4_GS, -+ VKD3D_SM5_HS, -+ VKD3D_SM5_DS, -+ VKD3D_SM5_CS, -+ 0, /* EFFECT */ -+ 0, /* TEXTURE */ -+ VKD3D_SM4_LIB, -+ }; -+ -+ tpf_writer_init(&tpf, ctx, &buffer); -+ -+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -+ -+ put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); -+ token_count_position = put_u32(&buffer, 0); -+ -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (cbuffer->reg.allocated) -+ write_sm4_dcl_constant_buffer(&tpf, cbuffer); -+ } -+ -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ -+ if (resource->regset == HLSL_REGSET_SAMPLERS) -+ write_sm4_dcl_samplers(&tpf, resource); -+ else if (resource->regset == HLSL_REGSET_TEXTURES) -+ write_sm4_dcl_textures(&tpf, resource, false); -+ else if (resource->regset == HLSL_REGSET_UAVS) -+ write_sm4_dcl_textures(&tpf, resource, true); -+ } -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -+ write_sm4_dcl_semantic(&tpf, var); -+ } -+ -+ if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) -+ write_sm4_dcl_thread_group(&tpf, ctx->thread_count); -+ -+ if (ctx->temp_count) -+ write_sm4_dcl_temps(&tpf, ctx->temp_count); -+ -+ write_sm4_block(&tpf, &entry_func->body); -+ -+ write_sm4_ret(&tpf); -+ -+ set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); -+ -+ add_section(ctx, dxbc, TAG_SHDR, &buffer); -+ -+ sm4_free_extern_resources(extern_resources, extern_resources_count); -+} -+ -+int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -+{ -+ struct dxbc_writer dxbc; -+ size_t i; -+ int ret; -+ -+ dxbc_writer_init(&dxbc); -+ -+ write_sm4_signature(ctx, &dxbc, false); -+ write_sm4_signature(ctx, &dxbc, true); -+ write_sm4_rdef(ctx, &dxbc); -+ write_sm4_shdr(ctx, entry_func, &dxbc); -+ -+ if (!(ret = ctx->result)) -+ ret = dxbc_writer_write(&dxbc, out); -+ for (i = 0; i < dxbc.section_count; ++i) -+ vkd3d_shader_free_shader_code(&dxbc.sections[i].data); -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index c9c15f01155..0245d83a10b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -22,6 +22,8 @@ - #include - #include - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -338,22 +340,35 @@ void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const stru - va_end(args); - } - -+size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) -+{ -+ size_t aligned_size = align(buffer->size, 4); -+ -+ if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, aligned_size, 1)) -+ { -+ buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; -+ return aligned_size; -+ } -+ -+ memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); -+ buffer->size = aligned_size; -+ return aligned_size; -+} -+ - size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) - { -- size_t aligned_size = align(size, 4); -- size_t offset = buffer->size; -+ size_t offset = bytecode_align(buffer); - - if (buffer->status) - return offset; - -- if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + aligned_size, 1)) -+ if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + size, 1)) - { - buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return offset; - } - memcpy(buffer->data + offset, bytes, size); -- memset(buffer->data + offset + size, 0xab, aligned_size - size); -- buffer->size = offset + aligned_size; -+ buffer->size = offset + size; - return offset; - } - -@@ -400,6 +415,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t - return "hlsl"; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - return "d3dbc"; -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ return "dxil"; - default: - FIXME("Unhandled source type %#x.\n", type); - return "bin"; -@@ -425,6 +442,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, - shader_get_source_type_suffix(source_type), shader->code, shader->size); - } - -+static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) -+{ -+ struct vkd3d_shader_scan_signature_info *signature_info; -+ -+ if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) -+ { -+ memset(&signature_info->input, 0, sizeof(signature_info->input)); -+ memset(&signature_info->output, 0, sizeof(signature_info->output)); -+ memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); -+ } -+} -+ - bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, - struct vkd3d_shader_message_context *message_context, const char *source_name, - const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, -@@ -511,9 +540,46 @@ void vkd3d_shader_free_messages(char *messages) - vkd3d_free(messages); - } - -+static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, -+ const struct shader_signature *src) -+{ -+ unsigned int i; -+ -+ signature->element_count = src->element_count; -+ if (!src->elements) -+ { -+ assert(!signature->element_count); -+ signature->elements = NULL; -+ return true; -+ } -+ -+ if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) -+ return false; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ struct vkd3d_shader_signature_element *d = &signature->elements[i]; -+ struct signature_element *e = &src->elements[i]; -+ -+ d->semantic_name = e->semantic_name; -+ d->semantic_index = e->semantic_index; -+ d->stream_index = e->stream_index; -+ d->sysval_semantic = e->sysval_semantic; -+ d->component_type = e->component_type; -+ d->register_index = e->register_index; -+ if (e->register_count > 1) -+ FIXME("Arrayed elements are not supported yet.\n"); -+ d->mask = e->mask; -+ d->used_mask = e->used_mask; -+ d->min_precision = e->min_precision; -+ } -+ -+ return true; -+} -+ - struct vkd3d_shader_scan_context - { -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; - size_t descriptors_size; - - struct vkd3d_shader_message_context *message_context; -@@ -533,20 +599,12 @@ struct vkd3d_shader_scan_context - size_t cf_info_size; - size_t cf_info_count; - -- struct -- { -- unsigned int id; -- unsigned int descriptor_idx; -- } *uav_ranges; -- size_t uav_ranges_size; -- size_t uav_range_count; -- - enum vkd3d_shader_api_version api_version; - }; - - static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -+ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context) - { - unsigned int i; -@@ -569,7 +627,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con - - static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) - { -- vkd3d_free(context->uav_ranges); - vkd3d_free(context->cf_info); - } - -@@ -637,18 +694,24 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf - return NULL; - } - --static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( -- const struct vkd3d_shader_scan_context *context, unsigned int range_id) -+static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, -+ const struct vkd3d_shader_register *reg, uint32_t flag) - { -+ unsigned int range_id = reg->idx[0].offset; - unsigned int i; - -- for (i = 0; i < context->uav_range_count; ++i) -+ if (!context->scan_descriptor_info) -+ return; -+ -+ for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) - { -- if (context->uav_ranges[i].id == range_id) -- return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; -+ if (context->scan_descriptor_info->descriptors[i].type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV -+ && context->scan_descriptor_info->descriptors[i].register_id == range_id) -+ { -+ context->scan_descriptor_info->descriptors[i].flags |= flag; -+ break; -+ } - } -- -- return NULL; - } - - static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) -@@ -664,13 +727,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr - static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); - } - - static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) -@@ -683,13 +740,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in - static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); - } - - static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) -@@ -702,93 +753,76 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ - static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_register *reg) - { -- struct vkd3d_shader_descriptor_info *d; -- -- if (!context->scan_descriptor_info) -- return; -- -- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); -- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; -+ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); - } - --static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, -- enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, -- enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, -- unsigned int flags) -+static struct vkd3d_shader_descriptor_info1 *vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, -+ enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, -+ const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, -+ enum vkd3d_shader_resource_data_type resource_data_type) - { -- struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; -- struct vkd3d_shader_descriptor_info *d; -+ struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; -+ struct vkd3d_shader_descriptor_info1 *d; - - if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, - info->descriptor_count + 1, sizeof(*info->descriptors))) - { - ERR("Failed to allocate descriptor info.\n"); -- return false; -+ return NULL; - } - - d = &info->descriptors[info->descriptor_count]; -+ memset(d, 0, sizeof(*d)); - d->type = type; -+ d->register_id = reg->idx[0].offset; - d->register_space = range->space; - d->register_index = range->first; - d->resource_type = resource_type; - d->resource_data_type = resource_data_type; -- d->flags = flags; - d->count = (range->last == ~0u) ? ~0u : range->last - range->first + 1; - ++info->descriptor_count; - -- return true; --} -- --static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, -- unsigned int id, unsigned int descriptor_idx) --{ -- if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, -- context->uav_range_count + 1, sizeof(*context->uav_ranges))) -- { -- ERR("Failed to allocate UAV range.\n"); -- return false; -- } -- -- context->uav_ranges[context->uav_range_count].id = id; -- context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; -- ++context->uav_range_count; -- -- return true; -+ return d; - } - - static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_constant_buffer *cb = &instruction->declaration.cb; -+ struct vkd3d_shader_descriptor_info1 *d; - - if (!context->scan_descriptor_info) - return; - -- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); -+ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, -+ &cb->src.reg, &cb->range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) -+ return; -+ d->buffer_size = cb->size * 16; - } - - static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_instruction *instruction) - { - const struct vkd3d_shader_sampler *sampler = &instruction->declaration.sampler; -- unsigned int flags; -+ struct vkd3d_shader_descriptor_info1 *d; - - if (!context->scan_descriptor_info) - return; - -+ if (!(d = vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, -+ &sampler->src.reg, &sampler->range, VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT))) -+ return; -+ - if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE) -- flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; -- else -- flags = 0; -- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, -- VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); -+ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; - } - - static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_context *context, - const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type, -- enum vkd3d_shader_resource_data_type resource_data_type) -+ enum vkd3d_shader_resource_data_type resource_data_type, -+ unsigned int sample_count, unsigned int structure_stride, bool raw) - { -+ struct vkd3d_shader_descriptor_info1 *d; - enum vkd3d_shader_descriptor_type type; - - if (!context->scan_descriptor_info) -@@ -798,10 +832,13 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont - type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; - else - type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; -- vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); -- if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -- vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, -- context->scan_descriptor_info->descriptor_count - 1); -+ if (!(d = vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, -+ &resource->range, resource_type, resource_data_type))) -+ return; -+ d->sample_count = sample_count; -+ d->structure_stride = structure_stride; -+ if (raw) -+ d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; - } - - static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, -@@ -860,7 +897,7 @@ static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_sca - } - - vkd3d_shader_scan_resource_declaration(context, &semantic->resource, -- semantic->resource_type, resource_data_type); -+ semantic->resource_type, resource_data_type, semantic->sample_count, 0, false); - } - - static void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, -@@ -894,12 +931,13 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - case VKD3DSIH_DCL_RESOURCE_RAW: - case VKD3DSIH_DCL_UAV_RAW: - vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.raw_resource.resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); -+ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, 0, true); - break; - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_UAV_STRUCTURED: - vkd3d_shader_scan_resource_declaration(context, &instruction->declaration.structured_resource.resource, -- VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT); -+ VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, -+ instruction->declaration.structured_resource.byte_stride, false); - break; - case VKD3DSIH_IF: - cf_info = vkd3d_shader_scan_push_cf_info(context); -@@ -1051,39 +1089,120 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - return VKD3D_OK; - } - -+static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, -+ const struct vkd3d_shader_scan_descriptor_info1 *info1) -+{ -+ unsigned int i; -+ -+ if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ for (i = 0; i < info1->descriptor_count; ++i) -+ { -+ const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; -+ struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; -+ -+ dst->type = src->type; -+ dst->register_space = src->register_space; -+ dst->register_index = src->register_index; -+ dst->resource_type = src->resource_type; -+ dst->resource_data_type = src->resource_data_type; -+ dst->flags = src->flags; -+ dst->count = src->count; -+ } -+ info->descriptor_count = info1->descriptor_count; -+ -+ return VKD3D_OK; -+} -+ -+static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) -+{ -+ TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); -+ -+ vkd3d_free(scan_descriptor_info->descriptors); -+} -+ - static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) -+ struct vkd3d_shader_message_context *message_context, -+ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) - { -- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; -+ struct vkd3d_shader_scan_descriptor_info *descriptor_info; -+ struct vkd3d_shader_scan_signature_info *signature_info; - struct vkd3d_shader_instruction *instruction; - struct vkd3d_shader_scan_context context; - int ret = VKD3D_OK; - unsigned int i; - -- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) -+ descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); -+ if (descriptor_info1) - { -- scan_descriptor_info->descriptors = NULL; -- scan_descriptor_info->descriptor_count = 0; -+ descriptor_info1->descriptors = NULL; -+ descriptor_info1->descriptor_count = 0; - } -+ else if (descriptor_info) -+ { -+ descriptor_info1 = &local_descriptor_info1; -+ } -+ signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); - -- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); -+ vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context); - - if (TRACE_ON()) - { -- vkd3d_shader_trace(parser); -+ vkd3d_shader_trace(&parser->instructions, &parser->shader_version); - } - - for (i = 0; i < parser->instructions.count; ++i) - { - instruction = &parser->instructions.elements[i]; - if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) -- { -- if (scan_descriptor_info) -- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); - break; -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) -+ { -+ unsigned int size = parser->shader_desc.flat_constant_count[i].external; -+ struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; -+ struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; -+ struct vkd3d_shader_descriptor_info1 *d; -+ -+ if (parser->shader_desc.flat_constant_count[i].external) -+ { -+ if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, -+ &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) -+ d->buffer_size = size * 16; - } - } - -+ if (!ret && signature_info) -+ { -+ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, -+ &parser->shader_desc.output_signature) -+ || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, -+ &parser->shader_desc.patch_constant_signature)) -+ { -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ } -+ -+ if (!ret && descriptor_info) -+ ret = convert_descriptor_info(descriptor_info, descriptor_info1); -+ -+ if (ret < 0) -+ { -+ if (descriptor_info) -+ vkd3d_shader_free_scan_descriptor_info(descriptor_info); -+ if (descriptor_info1) -+ vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); -+ if (signature_info) -+ vkd3d_shader_free_scan_signature_info(signature_info); -+ } -+ else -+ { -+ vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); -+ } - vkd3d_shader_scan_context_cleanup(&context); - return ret; - } -@@ -1100,7 +1219,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -- ret = scan_with_parser(compile_info, message_context, parser); -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -@@ -1118,7 +1237,25 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, - return ret; - } - -- ret = scan_with_parser(compile_info, message_context, parser); -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); -+ vkd3d_shader_parser_destroy(parser); -+ -+ return ret; -+} -+ -+static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_parser *parser; -+ int ret; -+ -+ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; -+ } -+ -+ ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -@@ -1137,6 +1274,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1154,6 +1293,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - ret = scan_d3dbc(compile_info, &message_context); - break; - -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = scan_dxil(compile_info, &message_context); -+ break; -+ - default: - ERR("Unsupported source type %#x.\n", compile_info->source_type); - ret = VKD3D_ERROR_INVALID_ARGUMENT; -@@ -1167,75 +1310,70 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - return ret; - } - --static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, -+static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { -- struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; -+ struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; -+ struct vkd3d_glsl_generator *glsl_generator; - struct vkd3d_shader_compile_info scan_info; -- struct spirv_compiler *spirv_compiler; -- struct vkd3d_shader_parser *parser; - int ret; - -+ vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); -+ - scan_info = *compile_info; -- scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; -- scan_descriptor_info.next = scan_info.next; -- scan_info.next = &scan_descriptor_info; - -- if ((ret = scan_dxbc(&scan_info, message_context)) < 0) -+ if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) - return ret; - -- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) -+ switch (compile_info->target_type) - { -- WARN("Failed to initialise shader parser.\n"); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return ret; -- } -+ case VKD3D_SHADER_TARGET_D3D_ASM: -+ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); -+ break; - -- vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); -+ case VKD3D_SHADER_TARGET_GLSL: -+ if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, -+ message_context, &parser->location))) -+ { -+ ERR("Failed to create GLSL generator.\n"); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); -+ return VKD3D_ERROR; -+ } - -- if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) -- { -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); -- vkd3d_shader_parser_destroy(parser); -- return ret; -- } -+ ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); -+ vkd3d_glsl_generator_destroy(glsl_generator); -+ break; - -- if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) -- { -- struct vkd3d_glsl_generator *glsl_generator; -+ case VKD3D_SHADER_TARGET_SPIRV_BINARY: -+ case VKD3D_SHADER_TARGET_SPIRV_TEXT: -+ ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); -+ break; - -- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, -- message_context, &parser->location))) -- { -- ERR("Failed to create GLSL generator.\n"); -- vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return VKD3D_ERROR; -- } -+ default: -+ /* Validation should prevent us from reaching this. */ -+ assert(0); -+ } - -- ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); -+ return ret; -+} - -- vkd3d_glsl_generator_destroy(glsl_generator); -- vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return ret; -- } -+static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_parser *parser; -+ int ret; - -- if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, -- compile_info, &scan_descriptor_info, message_context, &parser->location))) -+ if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) - { -- ERR("Failed to create DXBC compiler.\n"); -- vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); -- return VKD3D_ERROR; -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; - } - -- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); -+ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - -- spirv_compiler_destroy(spirv_compiler); - vkd3d_shader_parser_destroy(parser); -- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } - -@@ -1270,7 +1408,7 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ - - if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) - { -- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); -+ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); - vkd3d_shader_parser_destroy(parser); - return ret; - } -@@ -1278,6 +1416,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ - return VKD3D_ERROR; - } - -+static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_parser *parser; -+ int ret; -+ -+ if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) -+ { -+ WARN("Failed to initialise shader parser.\n"); -+ return ret; -+ } -+ -+ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); -+ -+ vkd3d_shader_parser_destroy(parser); -+ return ret; -+} -+ - int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, char **messages) - { -@@ -1292,6 +1448,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) - return ret; - -+ init_scan_signature_info(compile_info); -+ - vkd3d_shader_message_context_init(&message_context, compile_info->log_level); - - switch (compile_info->source_type) -@@ -1308,6 +1466,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - ret = compile_d3d_bytecode(compile_info, out, &message_context); - break; - -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = compile_dxbc_dxil(compile_info, out, &message_context); -+ break; -+ - default: - vkd3d_unreachable(); - } -@@ -1326,6 +1488,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ - vkd3d_free(scan_descriptor_info->descriptors); - } - -+void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) -+{ -+ TRACE("info %p.\n", info); -+ -+ vkd3d_shader_free_shader_signature(&info->input); -+ vkd3d_shader_free_shader_signature(&info->output); -+ vkd3d_shader_free_shader_signature(&info->patch_constant); -+} -+ - void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) - { - TRACE("shader_code %p.\n", shader_code); -@@ -1388,10 +1559,17 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu - desc->version = 0; - } - -+void shader_signature_cleanup(struct shader_signature *signature) -+{ -+ vkd3d_free(signature->elements); -+ signature->elements = NULL; -+} -+ - int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_signature *signature, char **messages) - { - struct vkd3d_shader_message_context message_context; -+ struct shader_signature shader_signature; - int ret; - - TRACE("dxbc {%p, %zu}, signature %p, messages %p.\n", dxbc->code, dxbc->size, signature, messages); -@@ -1400,13 +1578,17 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - *messages = NULL; - vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); - -- ret = shader_parse_input_signature(dxbc, &message_context, signature); -+ ret = shader_parse_input_signature(dxbc, &message_context, &shader_signature); - vkd3d_shader_message_context_trace_messages(&message_context); - if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) - ret = VKD3D_ERROR_OUT_OF_MEMORY; - - vkd3d_shader_message_context_cleanup(&message_context); - -+ if (!vkd3d_shader_signature_from_shader_signature(signature, &shader_signature)) -+ ret = VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ shader_signature_cleanup(&shader_signature); - return ret; - } - -@@ -1465,6 +1647,9 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns - VKD3D_SHADER_SOURCE_DXBC_TPF, - VKD3D_SHADER_SOURCE_HLSL, - VKD3D_SHADER_SOURCE_D3D_BYTECODE, -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ VKD3D_SHADER_SOURCE_DXBC_DXIL, -+#endif - }; - - TRACE("count %p.\n", count); -@@ -1503,6 +1688,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - - switch (source_type) - { -+#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+#endif - case VKD3D_SHADER_SOURCE_DXBC_TPF: - *count = ARRAY_SIZE(dxbc_tpf_types); - return dxbc_tpf_types; -@@ -1588,9 +1776,6 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, - { - void *params; - -- if (!count) -- return NULL; -- - if (count > allocator->count - allocator->index) - { - struct vkd3d_shader_param_node *next = shader_param_allocator_node_create(allocator); -@@ -1642,6 +1827,84 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins - return true; - } - -+static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( -+ struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, -+ unsigned int count); -+ -+static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, -+ struct vkd3d_shader_instruction_array *instructions) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < reg->idx_count; ++i) -+ { -+ if (!reg->idx[i].rel_addr) -+ continue; -+ -+ if (!(reg->idx[i].rel_addr = shader_instruction_array_clone_src_params(instructions, reg->idx[i].rel_addr, 1))) -+ return false; -+ } -+ -+ return true; -+} -+ -+static struct vkd3d_shader_dst_param *shader_instruction_array_clone_dst_params( -+ struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_dst_param *params, -+ unsigned int count) -+{ -+ struct vkd3d_shader_dst_param *dst_params; -+ unsigned int i; -+ -+ if (!(dst_params = shader_dst_param_allocator_get(&instructions->dst_params, count))) -+ return NULL; -+ -+ memcpy(dst_params, params, count * sizeof(*params)); -+ for (i = 0; i < count; ++i) -+ { -+ if (!shader_register_clone_relative_addresses(&dst_params[i].reg, instructions)) -+ return NULL; -+ } -+ -+ return dst_params; -+} -+ -+static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( -+ struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, -+ unsigned int count) -+{ -+ struct vkd3d_shader_src_param *src_params; -+ unsigned int i; -+ -+ if (!(src_params = shader_src_param_allocator_get(&instructions->src_params, count))) -+ return NULL; -+ -+ memcpy(src_params, params, count * sizeof(*params)); -+ for (i = 0; i < count; ++i) -+ { -+ if (!shader_register_clone_relative_addresses(&src_params[i].reg, instructions)) -+ return NULL; -+ } -+ -+ return src_params; -+} -+ -+/* NOTE: Immediate constant buffers are not cloned, so the source must not be destroyed while the -+ * destination is in use. This seems like a reasonable requirement given how this is currently used. */ -+bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, -+ unsigned int dst, unsigned int src) -+{ -+ struct vkd3d_shader_instruction *ins = &instructions->elements[dst]; -+ -+ *ins = instructions->elements[src]; -+ -+ if (ins->dst_count && ins->dst && !(ins->dst = shader_instruction_array_clone_dst_params(instructions, -+ ins->dst, ins->dst_count))) -+ return false; -+ -+ return !ins->src_count || !!(ins->src = shader_instruction_array_clone_src_params(instructions, -+ ins->src, ins->src_count)); -+} -+ - void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions) - { - unsigned int i; -@@ -1653,3 +1916,41 @@ void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *ins - vkd3d_free(instructions->icbs[i]); - vkd3d_free(instructions->icbs); - } -+ -+void vkd3d_shader_build_varying_map(const struct vkd3d_shader_signature *output_signature, -+ const struct vkd3d_shader_signature *input_signature, -+ unsigned int *ret_count, struct vkd3d_shader_varying_map *varyings) -+{ -+ unsigned int count = 0; -+ unsigned int i; -+ -+ TRACE("output_signature %p, input_signature %p, ret_count %p, varyings %p.\n", -+ output_signature, input_signature, ret_count, varyings); -+ -+ for (i = 0; i < input_signature->element_count; ++i) -+ { -+ const struct vkd3d_shader_signature_element *input_element, *output_element; -+ -+ input_element = &input_signature->elements[i]; -+ -+ if (input_element->sysval_semantic != VKD3D_SHADER_SV_NONE) -+ continue; -+ -+ varyings[count].input_register_index = input_element->register_index; -+ varyings[count].input_mask = input_element->mask; -+ -+ if ((output_element = vkd3d_shader_find_signature_element(output_signature, -+ input_element->semantic_name, input_element->semantic_index, 0))) -+ { -+ varyings[count].output_signature_index = output_element - output_signature->elements; -+ } -+ else -+ { -+ varyings[count].output_signature_index = output_signature->element_count; -+ } -+ -+ ++count; -+ } -+ -+ *ret_count = count; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 79be999adf9..eab1c730ae9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -49,7 +49,7 @@ - #include "vkd3d_common.h" - #include "vkd3d_memory.h" - #include "vkd3d_shader.h" --#include "wine/list.h" -+#include "list.h" - - #include - #include -@@ -74,6 +74,18 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, - VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, - VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY = 1002, -+ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT = 1003, -+ VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, -+ VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, -+ VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, -+ VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, -+ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, -+ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, -+ -+ VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, -+ VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, -+ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_MASK = 1302, -+ VKD3D_SHADER_WARNING_TPF_UNHANDLED_REGISTER_SWIZZLE = 1303, - - VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, - VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, -@@ -81,6 +93,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_IDX_UNSUPPORTED = 2003, - VKD3D_SHADER_ERROR_SPV_STENCIL_EXPORT_UNSUPPORTED = 2004, - -+ VKD3D_SHADER_WARNING_SPV_INVALID_SWIZZLE = 2300, -+ - VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY = 3000, - VKD3D_SHADER_ERROR_RS_INVALID_VERSION = 3001, - VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE = 3002, -@@ -125,10 +139,15 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, - VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, - VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, -+ VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, -+ VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, - VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, -+ VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, -+ VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, -+ VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, - - VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, - -@@ -137,8 +156,33 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, - VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, - VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, -+ VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, -+ VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, - - VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, -+ -+ VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND_COUNT = 8005, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_TABLE = 8006, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_VALUE_SYMTAB = 8007, -+ VKD3D_SHADER_ERROR_DXIL_UNSUPPORTED_BITCODE_FORMAT = 8008, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_FUNCTION_DCL = 8009, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_TYPE_ID = 8010, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_MODULE = 8011, -+ VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND = 8012, -+ -+ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, -+ VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, -+ VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, -+ VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, -+ VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS = 8304, -+ VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC = 8305, -+ -+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, - }; - - enum vkd3d_shader_opcode -@@ -223,6 +267,7 @@ enum vkd3d_shader_opcode - VKD3DSIH_DEQ, - VKD3DSIH_DFMA, - VKD3DSIH_DGE, -+ VKD3DSIH_DISCARD, - VKD3DSIH_DIV, - VKD3DSIH_DLT, - VKD3DSIH_DMAX, -@@ -477,6 +522,9 @@ enum vkd3d_shader_register_type - VKD3DSPR_DEPTHOUTLE, - VKD3DSPR_RASTERIZER, - VKD3DSPR_OUTSTENCILREF, -+ VKD3DSPR_UNDEF, -+ -+ VKD3DSPR_COUNT, - - VKD3DSPR_INVALID = ~0u, - }; -@@ -507,8 +555,14 @@ enum vkd3d_data_type - VKD3D_DATA_DOUBLE, - VKD3D_DATA_CONTINUED, - VKD3D_DATA_UNUSED, -+ VKD3D_DATA_UINT8, - }; - -+static inline bool data_type_is_integer(enum vkd3d_data_type data_type) -+{ -+ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT; -+} -+ - enum vkd3d_immconst_type - { - VKD3D_IMMCONST_SCALAR, -@@ -675,6 +729,7 @@ struct vkd3d_shader_register - bool non_uniform; - enum vkd3d_data_type data_type; - struct vkd3d_shader_register_index idx[3]; -+ unsigned int idx_count; - enum vkd3d_immconst_type immconst_type; - union - { -@@ -686,6 +741,9 @@ struct vkd3d_shader_register - } u; - }; - -+void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, -+ enum vkd3d_data_type data_type, unsigned int idx_count); -+ - struct vkd3d_shader_dst_param - { - struct vkd3d_shader_register reg; -@@ -774,13 +832,51 @@ enum vkd3d_shader_input_sysval_semantic - VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, - }; - -+#define SIGNATURE_TARGET_LOCATION_UNUSED (~0u) -+ -+struct signature_element -+{ -+ unsigned int sort_index; -+ const char *semantic_name; -+ unsigned int semantic_index; -+ unsigned int stream_index; -+ enum vkd3d_shader_sysval_semantic sysval_semantic; -+ enum vkd3d_shader_component_type component_type; -+ /* Register index in the source shader. */ -+ unsigned int register_index; -+ unsigned int register_count; -+ unsigned int mask; -+ unsigned int used_mask; -+ enum vkd3d_shader_minimum_precision min_precision; -+ /* Register index / location in the target shader. -+ * If SIGNATURE_TARGET_LOCATION_UNUSED, this element should not be written. */ -+ unsigned int target_location; -+}; -+ -+struct shader_signature -+{ -+ struct signature_element *elements; -+ size_t elements_capacity; -+ unsigned int element_count; -+}; -+ -+void shader_signature_cleanup(struct shader_signature *signature); -+ - struct vkd3d_shader_desc - { - const uint32_t *byte_code; - size_t byte_code_size; -- struct vkd3d_shader_signature input_signature; -- struct vkd3d_shader_signature output_signature; -- struct vkd3d_shader_signature patch_constant_signature; -+ bool is_dxil; -+ struct shader_signature input_signature; -+ struct shader_signature output_signature; -+ struct shader_signature patch_constant_signature; -+ -+ uint32_t temp_count; -+ -+ struct -+ { -+ uint32_t used, external; -+ } flat_constant_count[3]; - }; - - struct vkd3d_shader_register_semantic -@@ -912,6 +1008,8 @@ struct vkd3d_shader_instruction - } declaration; - }; - -+void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); -+ - static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) - { - return ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w; -@@ -927,6 +1025,16 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg - return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT; - } - -+static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_shader_register *reg) -+{ -+ return reg->type == VKD3DSPR_PATCHCONST; -+} -+ -+static inline bool register_is_constant(const struct vkd3d_shader_register *reg) -+{ -+ return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); -+} -+ - struct vkd3d_shader_location - { - const char *source_name; -@@ -981,6 +1089,8 @@ bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instru - bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); - bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, - struct vkd3d_shader_immediate_constant_buffer *icb); -+bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, -+ unsigned int dst, unsigned int src); - void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); - - struct vkd3d_shader_parser -@@ -991,10 +1101,8 @@ struct vkd3d_shader_parser - - struct vkd3d_shader_desc shader_desc; - struct vkd3d_shader_version shader_version; -- const uint32_t *ptr; - const struct vkd3d_shader_parser_ops *ops; - struct vkd3d_shader_instruction_array instructions; -- size_t instruction_idx; - }; - - struct vkd3d_shader_parser_ops -@@ -1028,7 +1136,29 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse - parser->ops->parser_destroy(parser); - } - --void vkd3d_shader_trace(struct vkd3d_shader_parser *parser); -+struct vkd3d_shader_descriptor_info1 -+{ -+ enum vkd3d_shader_descriptor_type type; -+ unsigned int register_space; -+ unsigned int register_index; -+ unsigned int register_id; -+ enum vkd3d_shader_resource_type resource_type; -+ enum vkd3d_shader_resource_data_type resource_data_type; -+ unsigned int flags; -+ unsigned int sample_count; -+ unsigned int buffer_size; -+ unsigned int structure_stride; -+ unsigned int count; -+}; -+ -+struct vkd3d_shader_scan_descriptor_info1 -+{ -+ struct vkd3d_shader_descriptor_info1 *descriptors; -+ unsigned int descriptor_count; -+}; -+ -+void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, -+ const struct vkd3d_shader_version *shader_version); - - const char *shader_get_type_prefix(enum vkd3d_shader_type type); - -@@ -1044,8 +1174,9 @@ struct vkd3d_string_buffer_cache - size_t count, max_count, capacity; - }; - --enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out); -+enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, -+ const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out); - void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); - struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); - void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); -@@ -1067,6 +1198,8 @@ struct vkd3d_bytecode_buffer - int status; - }; - -+/* Align to the next 4-byte offset, and return that offset. */ -+size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer); - size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); - void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value); - -@@ -1125,11 +1258,15 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); - int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); -+int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); - - void free_shader_desc(struct vkd3d_shader_desc *desc); - -+int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, -+ struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); - int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, -- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature); -+ struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); - - struct vkd3d_glsl_generator; - -@@ -1141,16 +1278,10 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); - - #define SPIRV_MAX_SRC_COUNT 6 - --struct spirv_compiler; -- --struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, -- const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, -- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); --int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, -- struct vkd3d_shader_code *spirv); --void spirv_compiler_destroy(struct spirv_compiler *compiler); -+int spirv_compile(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, -+ const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - - void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); - -@@ -1202,6 +1333,38 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( - } - } - -+static inline enum vkd3d_shader_component_type vkd3d_component_type_from_resource_data_type( -+ enum vkd3d_shader_resource_data_type data_type) -+{ -+ switch (data_type) -+ { -+ case VKD3D_SHADER_RESOURCE_DATA_FLOAT: -+ case VKD3D_SHADER_RESOURCE_DATA_UNORM: -+ case VKD3D_SHADER_RESOURCE_DATA_SNORM: -+ return VKD3D_SHADER_COMPONENT_FLOAT; -+ case VKD3D_SHADER_RESOURCE_DATA_UINT: -+ return VKD3D_SHADER_COMPONENT_UINT; -+ case VKD3D_SHADER_RESOURCE_DATA_INT: -+ return VKD3D_SHADER_COMPONENT_INT; -+ case VKD3D_SHADER_RESOURCE_DATA_DOUBLE: -+ case VKD3D_SHADER_RESOURCE_DATA_CONTINUED: -+ return VKD3D_SHADER_COMPONENT_DOUBLE; -+ default: -+ FIXME("Unhandled data type %#x.\n", data_type); -+ /* fall-through */ -+ case VKD3D_SHADER_RESOURCE_DATA_MIXED: -+ return VKD3D_SHADER_COMPONENT_UINT; -+ } -+} -+ -+enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, -+ unsigned int index); -+ -+static inline enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) -+{ -+ return vkd3d_siv_from_sysval_indexed(sysval, 0); -+} -+ - static inline unsigned int vkd3d_write_mask_get_component_idx(DWORD write_mask) - { - unsigned int i; -@@ -1293,6 +1456,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, - } - - #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) -+#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) - - #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') - #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') -@@ -1323,4 +1487,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - void dxbc_writer_init(struct dxbc_writer *dxbc); - int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); - -+enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, -+ const struct vkd3d_shader_compile_info *compile_info); -+ - #endif /* __VKD3D_SHADER_PRIVATE_H */ -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 4e2d4295935..42a98763438 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF - static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); - static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, - struct d3d12_fence *fence, uint64_t value); -+static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); - static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); - static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); - -@@ -453,9 +454,9 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( - } - - /* ID3D12Fence */ --static struct d3d12_fence *impl_from_ID3D12Fence(ID3D12Fence *iface) -+static struct d3d12_fence *impl_from_ID3D12Fence1(ID3D12Fence1 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_fence, ID3D12Fence1_iface); - } - - static VkResult d3d12_fence_create_vk_fence(struct d3d12_fence *fence, VkFence *vk_fence) -@@ -899,18 +900,19 @@ static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uin - vkd3d_mutex_unlock(&fence->mutex); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence1 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Fence) -+ if (IsEqualGUID(riid, &IID_ID3D12Fence1) -+ || IsEqualGUID(riid, &IID_ID3D12Fence) - || IsEqualGUID(riid, &IID_ID3D12Pageable) - || IsEqualGUID(riid, &IID_ID3D12DeviceChild) - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -- ID3D12Fence_AddRef(iface); -+ ID3D12Fence1_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -921,9 +923,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_QueryInterface(ID3D12Fence *iface, - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence *iface) -+static ULONG STDMETHODCALLTYPE d3d12_fence_AddRef(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - ULONG refcount = InterlockedIncrement(&fence->refcount); - - TRACE("%p increasing refcount to %u.\n", fence, refcount); -@@ -936,9 +938,9 @@ static void d3d12_fence_incref(struct d3d12_fence *fence) - InterlockedIncrement(&fence->internal_refcount); - } - --static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) -+static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - ULONG refcount = InterlockedDecrement(&fence->refcount); - - TRACE("%p decreasing refcount to %u.\n", fence, refcount); -@@ -971,10 +973,10 @@ static void d3d12_fence_decref(struct d3d12_fence *fence) - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence1 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -982,10 +984,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_GetPrivateData(ID3D12Fence *iface, - return vkd3d_get_private_data(&fence->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence1 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -993,37 +995,37 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateData(ID3D12Fence *iface, - return vkd3d_set_private_data(&fence->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetPrivateDataInterface(ID3D12Fence1 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&fence->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetName(ID3D12Fence1 *iface, const WCHAR *name) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, fence->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_GetDevice(ID3D12Fence1 *iface, REFIID iid, void **device) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(fence->device, iid, device); - } - --static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface) -+static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence1 *iface) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - uint64_t completed_value; - - TRACE("iface %p.\n", iface); -@@ -1034,10 +1036,10 @@ static UINT64 STDMETHODCALLTYPE d3d12_fence_GetCompletedValue(ID3D12Fence *iface - return completed_value; - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence1 *iface, - UINT64 value, HANDLE event) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - unsigned int i; - bool latch = false; - -@@ -1105,9 +1107,9 @@ static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fen - return d3d12_device_flush_blocked_queues(fence->device); - } - --static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) -+static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence1 *iface, UINT64 value) - { -- struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); - - TRACE("iface %p, value %#"PRIx64".\n", iface, value); - -@@ -1116,7 +1118,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 v - return d3d12_fence_signal(fence, value, VK_NULL_HANDLE, true); - } - --static const struct ID3D12FenceVtbl d3d12_fence_vtbl = -+static D3D12_FENCE_FLAGS STDMETHODCALLTYPE d3d12_fence_GetCreationFlags(ID3D12Fence1 *iface) -+{ -+ struct d3d12_fence *fence = impl_from_ID3D12Fence1(iface); -+ -+ TRACE("iface %p.\n", iface); -+ -+ return fence->flags; -+} -+ -+static const struct ID3D12Fence1Vtbl d3d12_fence_vtbl = - { - /* IUnknown methods */ - d3d12_fence_QueryInterface, -@@ -1133,14 +1144,18 @@ static const struct ID3D12FenceVtbl d3d12_fence_vtbl = - d3d12_fence_GetCompletedValue, - d3d12_fence_SetEventOnCompletion, - d3d12_fence_Signal, -+ /* ID3D12Fence1 methods */ -+ d3d12_fence_GetCreationFlags, - }; - - static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) - { -- if (!iface) -+ ID3D12Fence1 *iface1; -+ -+ if (!(iface1 = (ID3D12Fence1 *)iface)) - return NULL; -- assert(iface->lpVtbl == &d3d12_fence_vtbl); -- return impl_from_ID3D12Fence(iface); -+ assert(iface1->lpVtbl == &d3d12_fence_vtbl); -+ return impl_from_ID3D12Fence1(iface1); - } - - static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, -@@ -1150,7 +1165,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * - VkResult vr; - HRESULT hr; - -- fence->ID3D12Fence_iface.lpVtbl = &d3d12_fence_vtbl; -+ fence->ID3D12Fence1_iface.lpVtbl = &d3d12_fence_vtbl; - fence->internal_refcount = 1; - fence->refcount = 1; - -@@ -1161,7 +1176,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * - - vkd3d_cond_init(&fence->null_event_cond); - -- if (flags) -+ if ((fence->flags = flags)) - FIXME("Ignoring flags %#x.\n", flags); - - fence->events = NULL; -@@ -1315,32 +1330,26 @@ static HRESULT d3d12_command_allocator_allocate_command_buffer(struct d3d12_comm - return hr; - } - -- allocator->current_command_list = list; -- -- return S_OK; --} -- --static void d3d12_command_allocator_free_command_buffer(struct d3d12_command_allocator *allocator, -- struct d3d12_command_list *list) --{ -- struct d3d12_device *device = allocator->device; -- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- -- TRACE("allocator %p, list %p.\n", allocator, list); -- -- if (allocator->current_command_list == list) -- allocator->current_command_list = NULL; -- - if (!vkd3d_array_reserve((void **)&allocator->command_buffers, &allocator->command_buffers_size, - allocator->command_buffer_count + 1, sizeof(*allocator->command_buffers))) - { - WARN("Failed to add command buffer.\n"); - VK_CALL(vkFreeCommandBuffers(device->vk_device, allocator->vk_command_pool, - 1, &list->vk_command_buffer)); -- return; -+ return E_OUTOFMEMORY; - } -- - allocator->command_buffers[allocator->command_buffer_count++] = list->vk_command_buffer; -+ -+ allocator->current_command_list = list; -+ -+ return S_OK; -+} -+ -+static void d3d12_command_allocator_remove_command_list(struct d3d12_command_allocator *allocator, -+ const struct d3d12_command_list *list) -+{ -+ if (allocator->current_command_list == list) -+ allocator->current_command_list = NULL; - } - - static bool d3d12_command_allocator_add_render_pass(struct d3d12_command_allocator *allocator, VkRenderPass pass) -@@ -1437,7 +1446,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( - pool_desc.pNext = NULL; - pool_desc.flags = 0; - pool_desc.maxSets = 512; -- pool_desc.poolSizeCount = ARRAY_SIZE(device->vk_pool_sizes); -+ pool_desc.poolSizeCount = device->vk_pool_count; - pool_desc.pPoolSizes = device->vk_pool_sizes; - if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) - { -@@ -1910,10 +1919,32 @@ HRESULT d3d12_command_allocator_create(struct d3d12_device *device, - return S_OK; - } - -+static void d3d12_command_signature_incref(struct d3d12_command_signature *signature) -+{ -+ vkd3d_atomic_increment(&signature->internal_refcount); -+} -+ -+static void d3d12_command_signature_decref(struct d3d12_command_signature *signature) -+{ -+ unsigned int refcount = vkd3d_atomic_decrement(&signature->internal_refcount); -+ -+ if (!refcount) -+ { -+ struct d3d12_device *device = signature->device; -+ -+ vkd3d_private_store_destroy(&signature->private_store); -+ -+ vkd3d_free((void *)signature->desc.pArgumentDescs); -+ vkd3d_free(signature); -+ -+ d3d12_device_release(device); -+ } -+} -+ - /* ID3D12CommandList */ --static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList2(ID3D12GraphicsCommandList2 *iface) -+static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList3(ID3D12GraphicsCommandList3 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); - } - - static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) -@@ -2259,12 +2290,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList3 *iface, - REFIID iid, void **object) - { - TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); - -- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) -+ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) -+ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList1) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList) - || IsEqualGUID(iid, &IID_ID3D12CommandList) -@@ -2272,7 +2304,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - || IsEqualGUID(iid, &IID_ID3D12Object) - || IsEqualGUID(iid, &IID_IUnknown)) - { -- ID3D12GraphicsCommandList2_AddRef(iface); -+ ID3D12GraphicsCommandList3_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -2283,9 +2315,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList2 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - ULONG refcount = InterlockedIncrement(&list->refcount); - - TRACE("%p increasing refcount to %u.\n", list, refcount); -@@ -2298,9 +2330,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind - vkd3d_free(bindings->vk_uav_counter_views); - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList2 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - ULONG refcount = InterlockedDecrement(&list->refcount); - - TRACE("%p decreasing refcount to %u.\n", list, refcount); -@@ -2313,7 +2345,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - - /* When command pool is destroyed, all command buffers are implicitly freed. */ - if (list->allocator) -- d3d12_command_allocator_free_command_buffer(list->allocator, list); -+ d3d12_command_allocator_remove_command_list(list->allocator, list); - - vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_COMPUTE]); - vkd3d_pipeline_bindings_cleanup(&list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS]); -@@ -2326,66 +2358,66 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_get_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_set_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList3 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&list->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList2 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList3 *iface, const WCHAR *name) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList2 *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList3 *iface, REFIID iid, void **device) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(list->device, iid, device); - } - --static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList2 *iface) -+static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p.\n", iface); - - return list->type; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList2 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList3 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - VkResult vr; - -@@ -2411,7 +2443,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - - if (list->allocator) - { -- d3d12_command_allocator_free_command_buffer(list->allocator, list); -+ d3d12_command_allocator_remove_command_list(list->allocator, list); - list->allocator = NULL; - } - -@@ -2429,7 +2461,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - ID3D12PipelineState *initial_pipeline_state) - { -- ID3D12GraphicsCommandList2 *iface = &list->ID3D12GraphicsCommandList2_iface; -+ ID3D12GraphicsCommandList3 *iface = &list->ID3D12GraphicsCommandList3_iface; - - memset(list->strides, 0, sizeof(list->strides)); - list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; -@@ -2463,14 +2495,16 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); - memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets)); - -- ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); -+ list->descriptor_heap_count = 0; -+ -+ ID3D12GraphicsCommandList3_SetPipelineState(iface, initial_pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList2 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList3 *iface, - ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) - { - struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - HRESULT hr; - - TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", -@@ -2497,7 +2531,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL - return hr; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList3 *iface, - ID3D12PipelineState *pipeline_state) - { - FIXME("iface %p, pipline_state %p stub!\n", iface, pipeline_state); -@@ -2720,28 +2754,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - unsigned int index, bool use_array) - { - uint32_t descriptor_range_magic = range->descriptor_magic; -- const struct vkd3d_view *view = descriptor->s.u.view_info.view; -+ union d3d12_desc_object u = descriptor->s.u; - uint32_t vk_binding = range->binding; -+ VkDescriptorType vk_descriptor_type; - uint32_t set = range->set; - -- if (descriptor->s.magic != descriptor_range_magic) -+ if (!u.header || u.header->magic != descriptor_range_magic) - return false; - -+ vk_descriptor_type = u.header->vk_descriptor_type; -+ - vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_write->pNext = NULL; - vk_descriptor_write->dstSet = vk_descriptor_sets[set]; - vk_descriptor_write->dstBinding = use_array ? vk_binding : vk_binding + index; - vk_descriptor_write->dstArrayElement = use_array ? index : 0; - vk_descriptor_write->descriptorCount = 1; -- vk_descriptor_write->descriptorType = descriptor->s.vk_descriptor_type; -+ vk_descriptor_write->descriptorType = vk_descriptor_type; - vk_descriptor_write->pImageInfo = NULL; - vk_descriptor_write->pBufferInfo = NULL; - vk_descriptor_write->pTexelBufferView = NULL; - -- switch (descriptor->s.magic) -+ switch (u.header->magic) - { - case VKD3D_DESCRIPTOR_MAGIC_CBV: -- vk_descriptor_write->pBufferInfo = &descriptor->s.u.vk_cbv_info; -+ vk_descriptor_write->pBufferInfo = &u.cb_desc->vk_cbv_info; - break; - - case VKD3D_DESCRIPTOR_MAGIC_SRV: -@@ -2752,8 +2789,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - * in pairs in one set. */ - if (range->descriptor_count == UINT_MAX) - { -- if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -- && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) -+ if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -+ && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - { - vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; - vk_descriptor_write->dstBinding = 0; -@@ -2763,21 +2800,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - { - if (!use_array) - vk_descriptor_write->dstBinding = vk_binding + 2 * index; -- if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -- && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) -+ if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -+ && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - ++vk_descriptor_write->dstBinding; - } - -- if (descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -- || descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) -+ if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -+ || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - { -- vk_descriptor_write->pTexelBufferView = &view->u.vk_buffer_view; -+ vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; - } - else - { - vk_image_info->sampler = VK_NULL_HANDLE; -- vk_image_info->imageView = view->u.vk_image_view; -- vk_image_info->imageLayout = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_SRV -+ vk_image_info->imageView = u.view->v.u.vk_image_view; -+ vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV - ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; - - vk_descriptor_write->pImageInfo = vk_image_info; -@@ -2785,7 +2822,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - break; - - case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: -- vk_image_info->sampler = view->u.vk_sampler; -+ vk_image_info->sampler = u.view->v.u.vk_sampler; - vk_image_info->imageView = VK_NULL_HANDLE; - vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - -@@ -2793,7 +2830,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - break; - - default: -- ERR("Invalid descriptor %#x.\n", descriptor->s.magic); -+ ERR("Invalid descriptor %#x.\n", u.header->magic); - return false; - } - -@@ -2847,6 +2884,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list - for (j = 0; j < descriptor_count; ++j, ++descriptor) - { - unsigned int register_idx = range->base_register_idx + j; -+ union d3d12_desc_object u = descriptor->s.u; -+ VkBufferView vk_counter_view; -+ -+ vk_counter_view = (u.header && u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) -+ ? u.view->v.vk_counter_view : VK_NULL_HANDLE; - - /* Track UAV counters. */ - if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) -@@ -2856,8 +2898,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list - if (state->uav_counters.bindings[k].register_space == range->register_space - && state->uav_counters.bindings[k].register_index == register_idx) - { -- VkBufferView vk_counter_view = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV -- ? descriptor->s.u.view_info.view->vk_counter_view : VK_NULL_HANDLE; - if (bindings->vk_uav_counter_views[k] != vk_counter_view) - bindings->uav_counters_dirty = true; - bindings->vk_uav_counter_views[k] = vk_counter_view; -@@ -2867,7 +2907,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list - } - - /* Not all descriptors are necessarily populated if the range is unbounded. */ -- if (descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) -+ if (!u.header) - continue; - - if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, -@@ -3153,6 +3193,47 @@ static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_lis - } - } - -+static bool contains_heap(struct d3d12_descriptor_heap **heap_array, unsigned int count, -+ const struct d3d12_descriptor_heap *query) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < count; ++i) -+ if (heap_array[i] == query) -+ return true; -+ return false; -+} -+ -+static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) -+{ -+ struct d3d12_device *device = list->device; -+ unsigned int i; -+ -+ for (i = 0; i < list->descriptor_heap_count; ++i) -+ { -+ vkd3d_mutex_lock(&list->descriptor_heaps[i]->vk_sets_mutex); -+ d3d12_desc_flush_vk_heap_updates_locked(list->descriptor_heaps[i], device); -+ vkd3d_mutex_unlock(&list->descriptor_heaps[i]->vk_sets_mutex); -+ } -+} -+ -+static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) -+{ -+ if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) -+ { -+ if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -+ { -+ /* Descriptors can be written after binding. */ -+ FIXME("Flushing descriptor updates while list %p is not closed.\n", list); -+ vkd3d_mutex_lock(&heap->vk_sets_mutex); -+ d3d12_desc_flush_vk_heap_updates_locked(heap, list->device); -+ vkd3d_mutex_unlock(&heap->vk_sets_mutex); -+ return; -+ } -+ list->descriptor_heaps[list->descriptor_heap_count++] = heap; -+ } -+} -+ - static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, - enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) - { -@@ -3177,10 +3258,6 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l - bindings->sampler_heap_id = heap->serial_id; - } - -- /* These sets can be shared across multiple command lists, and therefore binding must -- * be synchronised. On an experimental branch in which caching of Vk descriptor writes -- * greatly increased the chance of multiple threads arriving here at the same time, -- * GRID 2019 crashed without the mutex lock. */ - vkd3d_mutex_lock(&heap->vk_sets_mutex); - - for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) -@@ -3313,11 +3390,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList3 *iface, - UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, - UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " -@@ -3337,11 +3414,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom - instance_count, start_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList3 *iface, - UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, - INT base_vertex_location, UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " -@@ -3363,10 +3440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap - instance_count, start_vertex_location, base_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList3 *iface, - UINT x, UINT y, UINT z) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); -@@ -3382,10 +3459,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL - VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy buffer_copy; -@@ -3584,7 +3661,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ - static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, - struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, - const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, -- unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) -+ unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) - { - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; -@@ -3611,6 +3688,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - buffer_image_copy.bufferImageHeight = 0; - vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, - src_format, src_sub_resource_idx, src_desc->MipLevels); -+ buffer_image_copy.imageSubresource.layerCount = layer_count; - src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - buffer_image_copy.imageOffset.x = 0; - buffer_image_copy.imageOffset.y = 0; -@@ -3618,7 +3696,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); - - buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * -- buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; -+ buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; - if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) - { - ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); -@@ -3644,6 +3722,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - - vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, - dst_format, dst_sub_resource_idx, dst_desc->MipLevels); -+ buffer_image_copy.imageSubresource.layerCount = layer_count; - dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - - assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == -@@ -3665,11 +3744,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) - && box->back > box->front; - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList3 *iface, - const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, - const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *src_format, *dst_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3773,7 +3852,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - { - d3d12_command_list_copy_incompatible_texture_region(list, - dst_resource, dst->u.SubresourceIndex, dst_format, -- src_resource, src->u.SubresourceIndex, src_format); -+ src_resource, src->u.SubresourceIndex, src_format, 1); - return; - } - -@@ -3790,11 +3869,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, ID3D12Resource *src) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *dst_resource, *src_resource; -+ const struct vkd3d_format *dst_format, *src_format; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy vk_buffer_copy; - VkImageCopy vk_image_copy; -@@ -3827,16 +3907,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - else - { - layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); -+ dst_format = dst_resource->format; -+ src_format = src_resource->format; - - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); - assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); - assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); - -+ if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) -+ { -+ for (i = 0; i < dst_resource->desc.MipLevels; ++i) -+ { -+ d3d12_command_list_copy_incompatible_texture_region(list, -+ dst_resource, i, dst_format, -+ src_resource, i, src_format, layer_count); -+ } -+ return; -+ } -+ - for (i = 0; i < dst_resource->desc.MipLevels; ++i) - { - vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, -- src_resource->format, dst_resource->format, NULL, 0, 0, 0); -+ src_format, dst_format, NULL, 0, 0, 0); - vk_image_copy.dstSubresource.layerCount = layer_count; - vk_image_copy.srcSubresource.layerCount = layer_count; - VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, -@@ -3846,7 +3939,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, - const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, - D3D12_TILE_COPY_FLAGS flags) -@@ -3857,11 +3950,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand - buffer, buffer_offset, flags); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst, UINT dst_sub_resource_idx, - ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_format *src_format, *dst_format, *vk_format; - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3924,10 +4017,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList3 *iface, - D3D12_PRIMITIVE_TOPOLOGY topology) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, topology %#x.\n", iface, topology); - -@@ -3938,11 +4031,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList3 *iface, - UINT viewport_count, const D3D12_VIEWPORT *viewports) - { - VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; - -@@ -3963,10 +4056,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo - vk_viewports[i].minDepth = viewports[i].MinDepth; - vk_viewports[i].maxDepth = viewports[i].MaxDepth; - -- if (!vk_viewports[i].width || !vk_viewports[i].height) -+ if (vk_viewports[i].width <= 0.0f) - { -- FIXME_ONCE("Invalid viewport %u, ignoring RSSetViewports().\n", i); -- return; -+ /* Vulkan does not support width <= 0 */ -+ FIXME_ONCE("Setting invalid viewport %u to zero height.\n", i); -+ vk_viewports[i].width = 1.0f; -+ vk_viewports[i].height = 0.0f; - } - } - -@@ -3974,10 +4069,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo - VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList3 *iface, - UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -4002,10 +4097,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic - VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList3 *iface, - const FLOAT blend_factor[4]) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); -@@ -4014,10 +4109,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics - VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList3 *iface, - UINT stencil_ref) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); -@@ -4026,11 +4121,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC - VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList3 *iface, - ID3D12PipelineState *pipeline_state) - { - struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); - -@@ -4081,10 +4176,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA - return 0; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList3 *iface, - UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - bool have_aliasing_barriers = false, have_split_barriers = false; - const struct vkd3d_vk_device_procs *vk_procs; - const struct vkd3d_vulkan_info *vk_info; -@@ -4307,13 +4402,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList3 *iface, - ID3D12GraphicsCommandList *command_list) - { - FIXME("iface %p, command_list %p stub!\n", iface, command_list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList3 *iface, - UINT heap_count, ID3D12DescriptorHeap *const *heaps) - { - TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); -@@ -4339,10 +4434,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis - d3d12_command_list_invalidate_root_parameters(list, bind_point); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList3 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4350,10 +4445,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G - unsafe_impl_from_ID3D12RootSignature(root_signature)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList3 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4366,6 +4461,7 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - { - struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; - const struct d3d12_root_signature *root_signature = bindings->root_signature; -+ struct d3d12_descriptor_heap *descriptor_heap; - struct d3d12_desc *desc; - - assert(root_signature_get_descriptor_table(root_signature, index)); -@@ -4376,15 +4472,25 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - if (bindings->descriptor_tables[index] == desc) - return; - -+ descriptor_heap = d3d12_desc_get_descriptor_heap(desc); -+ if (!(descriptor_heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) -+ { -+ /* GetGPUDescriptorHandleForHeapStart() returns a null handle in this case, -+ * but a CPU handle could be passed. */ -+ WARN("Descriptor heap %p is not shader visible.\n", descriptor_heap); -+ return; -+ } -+ command_list_add_descriptor_heap(list, descriptor_heap); -+ - bindings->descriptor_tables[index] = desc; - bindings->descriptor_table_dirty_mask |= (uint64_t)1 << index; - bindings->descriptor_table_active_mask |= (uint64_t)1 << index; - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", - iface, root_parameter_index, base_descriptor.ptr); -@@ -4393,10 +4499,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I - root_parameter_index, base_descriptor); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %#"PRIx64".\n", - iface, root_parameter_index, base_descriptor.ptr); -@@ -4418,10 +4524,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis - c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4430,10 +4536,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4442,10 +4548,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4454,10 +4560,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID - root_parameter_index, dst_offset, constant_count, data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList3 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4481,11 +4587,20 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - root_parameter = root_signature_get_root_descriptor(root_signature, index); - assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); - -- resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); -- buffer_info.buffer = resource->u.vk_buffer; -- buffer_info.offset = gpu_address - resource->gpu_address; -- buffer_info.range = resource->desc.Width - buffer_info.offset; -- buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); -+ if (gpu_address) -+ { -+ resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); -+ buffer_info.buffer = resource->u.vk_buffer; -+ buffer_info.offset = gpu_address - resource->gpu_address; -+ buffer_info.range = resource->desc.Width - buffer_info.offset; -+ buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); -+ } -+ else -+ { -+ buffer_info.buffer = list->device->null_resources.vk_buffer; -+ buffer_info.offset = 0; -+ buffer_info.range = VK_WHOLE_SIZE; -+ } - - if (vk_info->KHR_push_descriptor) - { -@@ -4510,9 +4625,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4521,9 +4636,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4547,13 +4662,13 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); - - /* FIXME: Re-use buffer views. */ -- if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view)) -+ if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) - { - ERR("Failed to create buffer view.\n"); - return; - } - -- if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) -+ if (vk_buffer_view && !(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) - { - ERR("Failed to add buffer view.\n"); - VK_CALL(vkDestroyBufferView(vk_device, vk_buffer_view, NULL)); -@@ -4582,9 +4697,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4594,9 +4709,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4606,9 +4721,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4618,9 +4733,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( -- ID3D12GraphicsCommandList2 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList3 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4629,10 +4744,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV - root_parameter_index, address); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList3 *iface, - const D3D12_INDEX_BUFFER_VIEW *view) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_vk_device_procs *vk_procs; - struct d3d12_resource *resource; - enum VkIndexType index_type; -@@ -4644,6 +4759,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics - WARN("Ignoring NULL index buffer view.\n"); - return; - } -+ if (!view->BufferLocation) -+ { -+ WARN("Ignoring index buffer location 0.\n"); -+ return; -+ } - - vk_procs = &list->device->vk_procs; - -@@ -4667,10 +4787,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics - view->BufferLocation - resource->gpu_address, index_type)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList3 *iface, - UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct vkd3d_null_resources *null_resources; - struct vkd3d_gpu_va_allocator *gpu_va_allocator; - VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; -@@ -4725,10 +4845,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList3 *iface, - UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; - VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; - VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; -@@ -4790,11 +4910,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm - VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList3 *iface, - UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, - BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_rtv_desc *rtv_desc; - const struct d3d12_dsv_desc *dsv_desc; - VkFormat prev_dsv_format; -@@ -4844,7 +4964,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi - WARN("Failed to add view.\n"); - } - -- list->rtvs[i] = view->u.vk_image_view; -+ list->rtvs[i] = view->v.u.vk_image_view; - list->fb_width = max(list->fb_width, rtv_desc->width); - list->fb_height = max(list->fb_height, rtv_desc->height); - list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); -@@ -4868,7 +4988,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi - list->dsv = VK_NULL_HANDLE; - } - -- list->dsv = view->u.vk_image_view; -+ list->dsv = view->v.u.vk_image_view; - list->fb_width = max(list->fb_width, dsv_desc->width); - list->fb_height = max(list->fb_height, dsv_desc->height); - list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); -@@ -4960,7 +5080,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, - fb_desc.flags = 0; - fb_desc.renderPass = vk_render_pass; - fb_desc.attachmentCount = 1; -- fb_desc.pAttachments = &view->u.vk_image_view; -+ fb_desc.pAttachments = &view->v.u.vk_image_view; - fb_desc.width = width; - fb_desc.height = height; - fb_desc.layers = layer_count; -@@ -4995,12 +5115,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList3 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, - UINT rect_count, const D3D12_RECT *rects) - { - const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference ds_reference; -@@ -5044,10 +5164,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra - &clear_value, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList3 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference color_reference; -@@ -5163,13 +5283,14 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea - } - - static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, -- struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, -+ struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, - unsigned int rect_count, const D3D12_RECT *rects) - { - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - unsigned int i, miplevel_idx, layer_count; - struct vkd3d_uav_clear_pipeline pipeline; - struct vkd3d_uav_clear_args clear_args; -+ const struct vkd3d_resource_view *view; - VkDescriptorImageInfo image_info; - D3D12_RECT full_rect, curr_rect; - VkWriteDescriptorSet write_set; -@@ -5181,8 +5302,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - d3d12_command_list_invalidate_bindings(list, list->state); - d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); - -- if (!d3d12_command_allocator_add_view(list->allocator, view)) -+ if (!d3d12_command_allocator_add_view(list->allocator, descriptor)) - WARN("Failed to add view.\n"); -+ view = &descriptor->v; - - clear_args.colour = *clear_colour; - -@@ -5290,15 +5412,16 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList3 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const UINT values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); -+ struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; -- struct vkd3d_view *view, *uint_view = NULL; - struct vkd3d_texture_view_desc view_desc; - const struct vkd3d_format *uint_format; -+ const struct vkd3d_resource_view *view; - struct d3d12_resource *resource_impl; - VkClearColorValue colour; - -@@ -5306,7 +5429,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); - - resource_impl = unsafe_impl_from_ID3D12Resource(resource); -- view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; -+ if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) -+ return; -+ view = &descriptor->v; - memcpy(colour.uint32, values, sizeof(colour.uint32)); - - if (view->format->type != VKD3D_FORMAT_TYPE_UINT) -@@ -5320,8 +5445,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - - if (d3d12_resource_is_buffer(resource_impl)) - { -- if (!vkd3d_create_buffer_view(device, resource_impl->u.vk_buffer, uint_format, -- view->info.buffer.offset, view->info.buffer.size, &uint_view)) -+ if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, -+ uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) - { - ERR("Failed to create buffer view.\n"); - return; -@@ -5337,26 +5462,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - view_desc.layer_idx = view->info.texture.layer_idx; - view_desc.layer_count = view->info.texture.layer_count; - -- if (!vkd3d_create_texture_view(device, resource_impl->u.vk_image, &view_desc, &uint_view)) -+ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, -+ &uint_view)) - { - ERR("Failed to create image view.\n"); - return; - } - } -- view = uint_view; -+ descriptor = uint_view; - } - -- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); -+ d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); - - if (uint_view) - vkd3d_view_decref(uint_view, device); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList3 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const float values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource_impl; - VkClearColorValue colour; - struct vkd3d_view *view; -@@ -5365,22 +5491,23 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I - iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); - - resource_impl = unsafe_impl_from_ID3D12Resource(resource); -- view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; -+ if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) -+ return; - memcpy(colour.float32, values, sizeof(colour.float32)); - - d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) - { - FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - VkQueryControlFlags flags = 0; -@@ -5407,10 +5534,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman - VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - -@@ -5452,12 +5579,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) - return sizeof(uint64_t); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList3 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, - ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) - { - const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i, first, count; -@@ -5533,10 +5660,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); - const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -5605,19 +5732,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList3 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList3 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList2 *iface) -+static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList3 *iface) - { - FIXME("iface %p stub!\n", iface); - } -@@ -5626,14 +5753,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN - STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); - STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList3 *iface, - ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, - UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) - { - struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); - struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); - struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -5651,6 +5778,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - return; - } - -+ d3d12_command_signature_incref(sig_impl); -+ - signature_desc = &sig_impl->desc; - for (i = 0; i < signature_desc->NumArgumentDescs; ++i) - { -@@ -5713,6 +5842,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - if (!d3d12_command_list_update_compute_state(list)) - { - WARN("Failed to update compute state, ignoring dispatch.\n"); -+ d3d12_command_signature_decref(sig_impl); - return; - } - -@@ -5725,9 +5855,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - break; - } - } -+ -+ d3d12_command_signature_decref(sig_impl); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5740,7 +5872,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5753,20 +5885,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList3 *iface, - FLOAT min, FLOAT max) - { - FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList3 *iface, - UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) - { - FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", - iface, sample_count, pixel_count, sample_positions); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList3 *iface, - ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, - ID3D12Resource *src_resource, UINT src_sub_resource_idx, - D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) -@@ -5778,16 +5910,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 - src_resource, src_sub_resource_idx, src_rect, format, mode); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList2 *iface, UINT mask) -+static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList3 *iface, UINT mask) - { - FIXME("iface %p, mask %#x stub!\n", iface, mask); - } - --static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList2 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList3 *iface, - UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, - const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList3(iface); - struct d3d12_resource *resource; - unsigned int i; - -@@ -5800,7 +5932,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap - } - } - --static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = -+static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList3 *iface, -+ ID3D12ProtectedResourceSession *protected_session) -+{ -+ FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); -+} -+ -+static const struct ID3D12GraphicsCommandList3Vtbl d3d12_command_list_vtbl = - { - /* IUnknown methods */ - d3d12_command_list_QueryInterface, -@@ -5876,6 +6014,8 @@ static const struct ID3D12GraphicsCommandList2Vtbl d3d12_command_list_vtbl = - d3d12_command_list_SetViewInstanceMask, - /* ID3D12GraphicsCommandList2 methods */ - d3d12_command_list_WriteBufferImmediate, -+ /* ID3D12GraphicsCommandList3 methods */ -+ d3d12_command_list_SetProtectedResourceSession, - }; - - static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) -@@ -5883,7 +6023,7 @@ static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12Comma - if (!iface) - return NULL; - assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList2_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList3_iface); - } - - static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, -@@ -5892,7 +6032,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d - { - HRESULT hr; - -- list->ID3D12GraphicsCommandList2_iface.lpVtbl = &d3d12_command_list_vtbl; -+ list->ID3D12GraphicsCommandList3_iface.lpVtbl = &d3d12_command_list_vtbl; - list->refcount = 1; - - list->type = type; -@@ -5906,6 +6046,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d - - list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors - : d3d12_command_list_update_descriptors; -+ list->descriptor_heap_count = 0; - - if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) - { -@@ -5999,8 +6140,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if - return refcount; - } - -+static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) -+{ -+ switch (op->opcode) -+ { -+ case VKD3D_CS_OP_WAIT: -+ d3d12_fence_decref(op->u.wait.fence); -+ break; -+ -+ case VKD3D_CS_OP_SIGNAL: -+ d3d12_fence_decref(op->u.signal.fence); -+ break; -+ -+ case VKD3D_CS_OP_EXECUTE: -+ vkd3d_free(op->u.execute.buffers); -+ break; -+ -+ case VKD3D_CS_OP_UPDATE_MAPPINGS: -+ case VKD3D_CS_OP_COPY_MAPPINGS: -+ break; -+ } -+} -+ - static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) - { -+ unsigned int i; -+ -+ for (i = 0; i < array->count; ++i) -+ d3d12_command_queue_destroy_op(&array->ops[i]); -+ - vkd3d_free(array->ops); - } - -@@ -6098,17 +6266,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc - return &array->ops[array->count++]; - } - -+static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) -+{ -+ void *buffer; -+ -+ *dst = NULL; -+ if (src) -+ { -+ if (!(buffer = vkd3d_calloc(count, elem_size))) -+ return false; -+ memcpy(buffer, src, count * elem_size); -+ *dst = buffer; -+ } -+ return true; -+} -+ -+static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) -+{ -+ vkd3d_free(update_mappings->region_start_coordinates); -+ vkd3d_free(update_mappings->region_sizes); -+ vkd3d_free(update_mappings->range_flags); -+ vkd3d_free(update_mappings->heap_range_offsets); -+ vkd3d_free(update_mappings->range_tile_counts); -+} -+ - static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, - ID3D12Resource *resource, UINT region_count, - const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, - ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " -+ struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); -+ struct vkd3d_cs_update_mappings update_mappings = {0}; -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " - "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " -- "range_tile_counts %p, flags %#x stub!\n", -+ "range_tile_counts %p, flags %#x.\n", - iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, - range_flags, heap_range_offsets, range_tile_counts, flags); -+ -+ if (!region_count || !range_count) -+ return; -+ -+ if (!command_queue->supports_sparse_binding) -+ { -+ FIXME("Command queue %p does not support sparse binding.\n", command_queue); -+ return; -+ } -+ -+ if (!resource_impl->tiles.subresource_count) -+ { -+ WARN("Resource %p is not a tiled resource.\n", resource_impl); -+ return; -+ } -+ -+ if (region_count > 1 && !region_start_coordinates) -+ { -+ WARN("Region start coordinates must not be NULL when region count is > 1.\n"); -+ return; -+ } -+ -+ if (range_count > 1 && !range_tile_counts) -+ { -+ WARN("Range tile counts must not be NULL when range count is > 1.\n"); -+ return; -+ } -+ -+ update_mappings.resource = resource_impl; -+ update_mappings.heap = heap_impl; -+ if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, -+ region_start_coordinates, sizeof(*region_start_coordinates), region_count)) -+ { -+ ERR("Failed to allocate region start coordinates.\n"); -+ return; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.region_sizes, -+ region_sizes, sizeof(*region_sizes), region_count)) -+ { -+ ERR("Failed to allocate region sizes.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_flags, -+ range_flags, sizeof(*range_flags), range_count)) -+ { -+ ERR("Failed to allocate range flags.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, -+ heap_range_offsets, sizeof(*heap_range_offsets), range_count)) -+ { -+ ERR("Failed to allocate heap range offsets.\n"); -+ goto free_clones; -+ } -+ if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, -+ range_tile_counts, sizeof(*range_tile_counts), range_count)) -+ { -+ ERR("Failed to allocate range tile counts.\n"); -+ goto free_clones; -+ } -+ update_mappings.region_count = region_count; -+ update_mappings.range_count = range_count; -+ update_mappings.flags = flags; -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ goto unlock_mutex; -+ } -+ -+ op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; -+ op->u.update_mappings = update_mappings; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+ return; -+ -+unlock_mutex: -+ vkd3d_mutex_unlock(&command_queue->op_mutex); -+free_clones: -+ update_mappings_cleanup(&update_mappings); - } - - static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, -@@ -6119,10 +6401,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command - const D3D12_TILE_REGION_SIZE *region_size, - D3D12_TILE_MAPPING_FLAGS flags) - { -- FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -- "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", -+ struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); -+ struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); -+ struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); -+ struct vkd3d_cs_op_data *op; -+ -+ TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " -+ "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", - iface, dst_resource, dst_region_start_coordinate, src_resource, - src_region_start_coordinate, region_size, flags); -+ -+ vkd3d_mutex_lock(&command_queue->op_mutex); -+ -+ if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) -+ { -+ ERR("Failed to add op.\n"); -+ return; -+ } -+ op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; -+ op->u.copy_mappings.dst_resource = dst_resource_impl; -+ op->u.copy_mappings.src_resource = src_resource_impl; -+ op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; -+ op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; -+ op->u.copy_mappings.region_size = *region_size; -+ op->u.copy_mappings.flags = flags; -+ -+ d3d12_command_queue_submit_locked(command_queue); -+ -+ vkd3d_mutex_unlock(&command_queue->op_mutex); - } - - static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, -@@ -6150,8 +6456,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu - ERR("Failed to submit queue(s), vr %d.\n", vr); - - vkd3d_queue_release(vkd3d_queue); -- -- vkd3d_free(buffers); - } - - static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) -@@ -6199,6 +6503,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm - return; - } - -+ command_list_flush_vk_heap_updates(cmd_list); -+ - buffers[i] = cmd_list->vk_command_buffer; - } - -@@ -6207,7 +6513,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { - ERR("Failed to add op.\n"); -- return; -+ goto done; - } - op->opcode = VKD3D_CS_OP_EXECUTE; - op->u.execute.buffers = buffers; -@@ -6215,6 +6521,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm - - d3d12_command_queue_submit_locked(command_queue); - -+done: - vkd3d_mutex_unlock(&command_queue->op_mutex); - return; - } -@@ -6282,6 +6589,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * - - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { -+ ERR("Failed to add op.\n"); - hr = E_OUTOFMEMORY; - goto done; - } -@@ -6620,6 +6928,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if - - if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) - { -+ ERR("Failed to add op.\n"); - hr = E_OUTOFMEMORY; - goto done; - } -@@ -6856,22 +7165,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - return d3d12_command_queue_fixup_after_flush_locked(queue); - } - d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); -- d3d12_fence_decref(fence); - break; - - case VKD3D_CS_OP_SIGNAL: - d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); -- d3d12_fence_decref(op->u.signal.fence); - break; - - case VKD3D_CS_OP_EXECUTE: - d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); - break; - -+ case VKD3D_CS_OP_UPDATE_MAPPINGS: -+ FIXME("Tiled resource binding is not supported yet.\n"); -+ update_mappings_cleanup(&op->u.update_mappings); -+ break; -+ -+ case VKD3D_CS_OP_COPY_MAPPINGS: -+ FIXME("Tiled resource mapping copying is not supported yet.\n"); -+ break; -+ - default: - vkd3d_unreachable(); - } - -+ d3d12_command_queue_destroy_op(op); -+ - *flushed_any |= true; - } - -@@ -6934,6 +7252,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, - if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) - goto fail_destroy_op_mutex; - -+ queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); -+ - d3d12_device_add_ref(queue->device = device); - - return S_OK; -@@ -7039,16 +7359,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_signature_Release(ID3D12CommandSign - TRACE("%p decreasing refcount to %u.\n", signature, refcount); - - if (!refcount) -- { -- struct d3d12_device *device = signature->device; -- -- vkd3d_private_store_destroy(&signature->private_store); -- -- vkd3d_free((void *)signature->desc.pArgumentDescs); -- vkd3d_free(signature); -- -- d3d12_device_release(device); -- } -+ d3d12_command_signature_decref(signature); - - return refcount; - } -@@ -7155,6 +7466,7 @@ HRESULT d3d12_command_signature_create(struct d3d12_device *device, const D3D12_ - - object->ID3D12CommandSignature_iface.lpVtbl = &d3d12_command_signature_vtbl; - object->refcount = 1; -+ object->internal_refcount = 1; - - object->desc = *desc; - if (!(object->desc.pArgumentDescs = vkd3d_calloc(desc->NumArgumentDescs, sizeof(*desc->pArgumentDescs)))) -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 39a5ca013c7..c33061073a3 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -19,6 +19,8 @@ - #include "vkd3d_private.h" - #include "vkd3d_version.h" - -+#define VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE 256u -+ - struct vkd3d_struct - { - enum vkd3d_structure_type type; -@@ -1462,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; -+ vulkan_info->sparse_binding = features->sparseBinding; -+ vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; - vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; - vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; -@@ -2393,9 +2397,23 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) - WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); - } - --static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, -- const struct vkd3d_device_descriptor_limits *limits) -+static void device_init_descriptor_pool_sizes(struct d3d12_device *device) - { -+ const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; -+ VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; -+ -+ if (device->use_vk_heaps) -+ { -+ pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -+ pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, -+ VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); -+ pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; -+ pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; -+ device->vk_pool_count = 2; -+ return; -+ } -+ -+ assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -@@ -2412,20 +2430,44 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, - pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; - pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -+ device->vk_pool_count = 6; - }; - -+static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) -+{ -+ memset(cache, 0, sizeof(*cache)); -+ cache->size = size; -+} -+ -+static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) -+{ -+ union d3d12_desc_object u; -+ unsigned int i; -+ void *next; -+ -+ for (i = 0; i < ARRAY_SIZE(cache->heads); ++i) -+ { -+ for (u.object = cache->heads[i].head; u.object; u.object = next) -+ { -+ next = u.header->next; -+ vkd3d_free(u.object); -+ } -+ } -+} -+ - /* ID3D12Device */ --static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) -+static inline struct d3d12_device *impl_from_ID3D12Device1(ID3D12Device1 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device1_iface); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *iface, - REFIID riid, void **object) - { - TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - -- if (IsEqualGUID(riid, &IID_ID3D12Device) -+ if (IsEqualGUID(riid, &IID_ID3D12Device1) -+ || IsEqualGUID(riid, &IID_ID3D12Device) - || IsEqualGUID(riid, &IID_ID3D12Object) - || IsEqualGUID(riid, &IID_IUnknown)) - { -@@ -2440,9 +2482,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device *iface - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - ULONG refcount = InterlockedIncrement(&device->refcount); - - TRACE("%p increasing refcount to %u.\n", device, refcount); -@@ -2450,11 +2492,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device *iface) - return refcount; - } - --static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) -+static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - ULONG refcount = InterlockedDecrement(&device->refcount); -- size_t i; - - TRACE("%p decreasing refcount to %u.\n", device, refcount); - -@@ -2474,8 +2515,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) - vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); - d3d12_device_destroy_pipeline_cache(device); - d3d12_device_destroy_vkd3d_queues(device); -- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) -- vkd3d_mutex_destroy(&device->desc_mutex[i]); -+ vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); -+ vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); - VK_CALL(vkDestroyDevice(device->vk_device, NULL)); - if (device->parent) - IUnknown_Release(device->parent); -@@ -2487,10 +2528,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2498,10 +2539,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device *iface - return vkd3d_get_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", - iface, debugstr_guid(guid), data_size, data); -@@ -2509,19 +2550,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device *iface - return vkd3d_set_private_data(&device->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device1 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&device->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, const WCHAR *name) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); - -@@ -2529,17 +2570,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device *iface, const - VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device *iface) -+static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device1 *iface) - { - TRACE("iface %p.\n", iface); - - return 1; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 *iface, - const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_queue *object; - HRESULT hr; - -@@ -2553,10 +2594,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device *i - riid, command_queue); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device1 *iface, - D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_allocator *object; - HRESULT hr; - -@@ -2570,10 +2611,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic - riid, command_allocator); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device1 *iface, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2587,10 +2628,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device1 *iface, - const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_pipeline_state *object; - HRESULT hr; - -@@ -2604,11 +2645,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D - &IID_ID3D12PipelineState, riid, pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device1 *iface, - UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, - ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_list *object; - HRESULT hr; - -@@ -2621,8 +2662,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device *if - initial_pipeline_state, &object))) - return hr; - -- return return_interface(&object->ID3D12GraphicsCommandList2_iface, -- &IID_ID3D12GraphicsCommandList2, riid, command_list); -+ return return_interface(&object->ID3D12GraphicsCommandList3_iface, -+ &IID_ID3D12GraphicsCommandList3, riid, command_list); - } - - /* Direct3D feature levels restrict which formats can be optionally supported. */ -@@ -2731,10 +2772,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) - return true; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 *iface, - D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", - iface, feature, feature_data, feature_data_size); -@@ -3233,10 +3274,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device * - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 *iface, - const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_descriptor_heap *object; - HRESULT hr; - -@@ -3250,7 +3291,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device - &IID_ID3D12DescriptorHeap, riid, descriptor_heap); - } - --static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device *iface, -+static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device1 *iface, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { - TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); -@@ -3273,11 +3314,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 *iface, - UINT node_mask, const void *bytecode, SIZE_T bytecode_length, - REFIID riid, void **root_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_root_signature *object; - HRESULT hr; - -@@ -3293,10 +3334,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * - &IID_ID3D12RootSignature, riid, root_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device1 *iface, - const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); -@@ -3305,11 +3346,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device1 *iface, - ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", -@@ -3319,11 +3360,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device1 *iface, - ID3D12Resource *resource, ID3D12Resource *counter_resource, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", -@@ -3334,7 +3375,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 *iface, - ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3342,10 +3383,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device * - iface, resource, desc, descriptor.ptr); - - d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 *iface, - ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, - D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -@@ -3353,13 +3394,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * - iface, resource, desc, descriptor.ptr); - - d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), -- impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); -+ impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); - } - --static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, - const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_desc tmp = {0}; - - TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); -@@ -3368,142 +3409,17 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); - } - --static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], -- struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) --{ -- enum vkd3d_vk_descriptor_set_index set; -- for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set) -- { -- if (!infos[set].count) -- continue; -- d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); -- infos[set].count = 0; -- infos[set].uav_counter = false; -- } --} -- --static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -- struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], -- struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) --{ -- struct d3d12_desc_copy_location *location; -- enum vkd3d_vk_descriptor_set_index set; -- struct vkd3d_mutex *mutex; -- -- mutex = d3d12_device_get_descriptor_mutex(device, src); -- vkd3d_mutex_lock(mutex); -- -- if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) -- { -- /* Source must be unlocked first, and therefore can't be used as a null source. */ -- static const struct d3d12_desc null = {0}; -- vkd3d_mutex_unlock(mutex); -- d3d12_desc_write_atomic(dst, &null, device); -- return; -- } -- -- set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->s.vk_descriptor_type); -- location = &locations[set][infos[set].count++]; -- -- location->src.s = src->s; -- -- if (location->src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) -- vkd3d_view_incref(location->src.s.u.view_info.view); -- -- vkd3d_mutex_unlock(mutex); -- -- infos[set].uav_counter |= (location->src.s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) -- && !!location->src.s.u.view_info.view->vk_counter_view; -- location->dst = dst; -- -- if (infos[set].count == ARRAY_SIZE(locations[0])) -- { -- d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); -- infos[set].count = 0; -- infos[set].uav_counter = false; -- } --} -- --/* Some games, e.g. Control, copy a large number of descriptors per frame, so the -- * speed of this function is critical. */ --static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, -- UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, -- const UINT *dst_descriptor_range_sizes, -- UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, -- const UINT *src_descriptor_range_sizes) --{ -- struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -- unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; -- /* The locations array is relatively large, and often mostly empty. Keeping these -- * values together in a separate array will likely result in fewer cache misses. */ -- struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT]; -- struct d3d12_descriptor_heap *descriptor_heap = NULL; -- const struct d3d12_desc *src, *heap_base, *heap_end; -- unsigned int dst_range_size, src_range_size; -- struct d3d12_desc *dst; -- -- descriptor_heap = d3d12_desc_get_descriptor_heap(d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0])); -- heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; -- heap_end = heap_base + descriptor_heap->desc.NumDescriptors; -- -- memset(infos, 0, sizeof(infos)); -- dst_range_idx = dst_idx = 0; -- src_range_idx = src_idx = 0; -- while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) -- { -- dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; -- src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; -- -- dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); -- src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); -- -- if (dst < heap_base || dst >= heap_end) -- { -- flush_desc_writes(locations, infos, descriptor_heap, device); -- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); -- heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; -- heap_end = heap_base + descriptor_heap->desc.NumDescriptors; -- } -- -- for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) -- { -- /* We don't need to lock either descriptor for the identity check. The descriptor -- * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a -- * race condition in the calling app. It is unnecessary to protect this test as it's -- * the app's race condition, not ours. */ -- if (dst[dst_idx].s.magic == src[src_idx].s.magic && (dst[dst_idx].s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) -- && dst[dst_idx].s.u.view_info.written_serial_id == src[src_idx].s.u.view_info.view->serial_id) -- continue; -- d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); -- } -- -- if (dst_idx >= dst_range_size) -- { -- ++dst_range_idx; -- dst_idx = 0; -- } -- if (src_idx >= src_range_size) -- { -- ++src_range_idx; -- src_idx = 0; -- } -- } -- -- flush_desc_writes(locations, infos, descriptor_heap, device); --} -- --#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 -- --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - unsigned int dst_range_size, src_range_size; -+ struct d3d12_descriptor_heap *dst_heap; - const struct d3d12_desc *src; - struct d3d12_desc *dst; - -@@ -3525,15 +3441,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, - if (!dst_descriptor_range_count) - return; - -- if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes -- && dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT))) -- { -- d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets, -- dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, -- src_descriptor_range_sizes); -- return; -- } -- - dst_range_idx = dst_idx = 0; - src_range_idx = src_idx = 0; - while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) -@@ -3542,10 +3449,15 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, - src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; - - dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); -+ dst_heap = d3d12_desc_get_descriptor_heap(dst); - src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - -- while (dst_idx < dst_range_size && src_idx < src_range_size) -- d3d12_desc_copy(&dst[dst_idx++], &src[src_idx++], device); -+ for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) -+ { -+ if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) -+ continue; -+ d3d12_desc_copy(&dst[dst_idx], &src[src_idx], dst_heap, device); -+ } - - if (dst_idx >= dst_range_size) - { -@@ -3560,7 +3472,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, - } - } - --static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 *iface, - UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, - const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) -@@ -3570,26 +3482,15 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i - iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, - descriptor_heap_type); - -- if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT) -- { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -- if (device->use_vk_heaps) -- { -- d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset, -- &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count); -- return; -- } -- } -- - d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, - 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); - } - - static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( -- ID3D12Device *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, -+ ID3D12Device1 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, - UINT count, const D3D12_RESOURCE_DESC *resource_descs) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - const D3D12_RESOURCE_DESC *desc; - uint64_t requested_alignment; - -@@ -3662,10 +3563,10 @@ invalid: - return info; - } - --static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device *iface, -+static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device1 *iface, - D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - bool coherent; - - TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", -@@ -3705,12 +3606,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope - return heap_properties; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device1 *iface, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_resource *object; - HRESULT hr; - -@@ -3729,10 +3630,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi - return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, - const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_heap *object; - HRESULT hr; - -@@ -3748,12 +3649,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device *iface, - return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 *iface, - ID3D12Heap *heap, UINT64 heap_offset, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_heap *heap_object; - struct d3d12_resource *object; - HRESULT hr; -@@ -3772,11 +3673,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device - return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device1 *iface, - const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_resource *object; - HRESULT hr; - -@@ -3790,11 +3691,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic - return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 *iface, - ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, - const WCHAR *name, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", - iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); -@@ -3802,7 +3703,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device *i - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *iface, - HANDLE handle, REFIID riid, void **object) - { - FIXME("iface %p, handle %p, riid %s, object %p stub!\n", -@@ -3811,10 +3712,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device *ifa - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device1 *iface, - const WCHAR *name, DWORD access, HANDLE *handle) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - FIXME("iface %p, name %s, access %#x, handle %p stub!\n", - iface, debugstr_w(name, device->wchar_size), access, handle); -@@ -3822,7 +3723,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -3831,7 +3732,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device *iface, - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, - UINT object_count, ID3D12Pageable * const *objects) - { - FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", -@@ -3840,10 +3741,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device *iface, - return S_OK; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, - UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_fence *object; - HRESULT hr; - -@@ -3853,24 +3754,24 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device *iface, - if (FAILED(hr = d3d12_fence_create(device, initial_value, flags, &object))) - return hr; - -- return return_interface(&object->ID3D12Fence_iface, &IID_ID3D12Fence, riid, fence); -+ return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device1 *iface) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p.\n", iface); - - return device->removed_reason; - } - --static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 *iface, - const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, - UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, - UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; - unsigned int width, height, depth, plane_count, sub_resources_per_plane; -@@ -3950,10 +3851,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i - *total_bytes = total; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *iface, - const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_query_heap *object; - HRESULT hr; - -@@ -3966,18 +3867,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device *ifac - return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device *iface, BOOL enable) -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device1 *iface, BOOL enable) - { - FIXME("iface %p, enable %#x stub!\n", iface, enable); - - return E_NOTIMPL; - } - --static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device1 *iface, - const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, - REFIID iid, void **command_signature) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - struct d3d12_command_signature *object; - HRESULT hr; - -@@ -3991,23 +3892,29 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic - &IID_ID3D12CommandSignature, iid, command_signature); - } - --static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device *iface, -+static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *iface, - ID3D12Resource *resource, UINT *total_tile_count, - D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, - UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, - D3D12_SUBRESOURCE_TILING *sub_resource_tilings) - { -- FIXME("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " -+ const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); -+ -+ TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " - "standard_title_shape %p, sub_resource_tiling_count %p, " -- "first_sub_resource_tiling %u, sub_resource_tilings %p stub!\n", -+ "first_sub_resource_tiling %u, sub_resource_tilings %p.\n", - iface, resource, total_tile_count, packed_mip_info, standard_tile_shape, - sub_resource_tiling_count, first_sub_resource_tiling, - sub_resource_tilings); -+ -+ d3d12_resource_get_tiling(device, resource_impl, total_tile_count, packed_mip_info, standard_tile_shape, -+ sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); - } - --static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, LUID *luid) -+static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface, LUID *luid) - { -- struct d3d12_device *device = impl_from_ID3D12Device(iface); -+ struct d3d12_device *device = impl_from_ID3D12Device1(iface); - - TRACE("iface %p, luid %p.\n", iface, luid); - -@@ -4016,7 +3923,33 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device *iface, - return luid; - } - --static const struct ID3D12DeviceVtbl d3d12_device_vtbl = -+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device1 *iface, -+ const void *blob, SIZE_T blob_size, REFIID iid, void **lib) -+{ -+ FIXME("iface %p, blob %p, blob_size %lu, iid %s, lib %p stub!\n", iface, blob, blob_size, debugstr_guid(iid), lib); -+ -+ return DXGI_ERROR_UNSUPPORTED; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device1 *iface, -+ ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, -+ D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) -+{ -+ FIXME("iface %p, fences %p, values %p, fence_count %u, flags %#x, event %p stub!\n", -+ iface, fences, values, fence_count, flags, event); -+ -+ return E_NOTIMPL; -+} -+ -+static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 *iface, -+ UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) -+{ -+ FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); -+ -+ return S_OK; -+} -+ -+static const struct ID3D12Device1Vtbl d3d12_device_vtbl = - { - /* IUnknown methods */ - d3d12_device_QueryInterface, -@@ -4065,14 +3998,18 @@ static const struct ID3D12DeviceVtbl d3d12_device_vtbl = - d3d12_device_CreateCommandSignature, - d3d12_device_GetResourceTiling, - d3d12_device_GetAdapterLuid, -+ /* ID3D12Device1 methods */ -+ d3d12_device_CreatePipelineLibrary, -+ d3d12_device_SetEventOnMultipleFenceCompletion, -+ d3d12_device_SetResidencyPriority, - }; - --struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) -+struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface) - { - if (!iface) - return NULL; - assert(iface->lpVtbl == &d3d12_device_vtbl); -- return impl_from_ID3D12Device(iface); -+ return impl_from_ID3D12Device1(iface); - } - - static HRESULT d3d12_device_init(struct d3d12_device *device, -@@ -4080,9 +4017,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, - { - const struct vkd3d_vk_device_procs *vk_procs; - HRESULT hr; -- size_t i; - -- device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; -+ device->ID3D12Device1_iface.lpVtbl = &d3d12_device_vtbl; - device->refcount = 1; - - vkd3d_instance_incref(device->vkd3d_instance = instance); -@@ -4123,10 +4059,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, - device->blocked_queue_count = 0; - vkd3d_mutex_init(&device->blocked_queues_mutex); - -- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) -- vkd3d_mutex_init(&device->desc_mutex[i]); -+ vkd3d_desc_object_cache_init(&device->view_desc_cache, sizeof(struct vkd3d_view)); -+ vkd3d_desc_object_cache_init(&device->cbuffer_desc_cache, sizeof(struct vkd3d_cbuffer_desc)); - -- vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); -+ device_init_descriptor_pool_sizes(device); - - if ((device->parent = create_info->parent)) - IUnknown_AddRef(device->parent); -@@ -4279,28 +4215,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha - - IUnknown *vkd3d_get_device_parent(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->parent; - } - - VkDevice vkd3d_get_vk_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->vk_device; - } - - VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->vk_physical_device; - } - - struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) - { -- struct d3d12_device *d3d12_device = impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); - - return d3d12_device->vkd3d_instance; - } -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 8c050cfeb32..f3842958d96 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -326,6 +326,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap) - - vkd3d_private_store_destroy(&heap->private_store); - -+ if (heap->map_ptr) -+ VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); -+ - VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL)); - - vkd3d_mutex_destroy(&heap->mutex); -@@ -346,12 +349,19 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_Release(ID3D12Heap *iface) - - TRACE("%p decreasing refcount to %u.\n", heap, refcount); - -- if (!refcount) -+ /* A heap must not be destroyed until all contained resources are destroyed. */ -+ if (!refcount && !heap->resource_count) - d3d12_heap_destroy(heap); - - return refcount; - } - -+static void d3d12_heap_resource_destroyed(struct d3d12_heap *heap) -+{ -+ if (!InterlockedDecrement(&heap->resource_count) && (!heap->refcount || heap->is_private)) -+ d3d12_heap_destroy(heap); -+} -+ - static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(ID3D12Heap *iface, - REFGUID guid, UINT *data_size, void *data) - { -@@ -437,97 +447,6 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) - return impl_from_ID3D12Heap(iface); - } - --static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, -- struct d3d12_resource *resource, void **data) --{ -- struct d3d12_device *device = heap->device; -- HRESULT hr = S_OK; -- VkResult vr; -- -- vkd3d_mutex_lock(&heap->mutex); -- -- assert(!resource->map_count || heap->map_ptr); -- -- if (!resource->map_count) -- { -- if (!heap->map_ptr) -- { -- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- -- TRACE("Mapping heap %p.\n", heap); -- -- assert(!heap->map_count); -- -- if ((vr = VK_CALL(vkMapMemory(device->vk_device, heap->vk_memory, -- 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) -- { -- WARN("Failed to map device memory, vr %d.\n", vr); -- heap->map_ptr = NULL; -- } -- -- hr = hresult_from_vk_result(vr); -- } -- -- if (heap->map_ptr) -- ++heap->map_count; -- } -- -- if (hr == S_OK) -- { -- assert(heap->map_ptr); -- if (data) -- *data = (BYTE *)heap->map_ptr + offset; -- ++resource->map_count; -- } -- else -- { -- assert(!heap->map_ptr); -- if (data) -- *data = NULL; -- } -- -- vkd3d_mutex_unlock(&heap->mutex); -- -- return hr; --} -- --static void d3d12_heap_unmap(struct d3d12_heap *heap, struct d3d12_resource *resource) --{ -- struct d3d12_device *device = heap->device; -- -- vkd3d_mutex_lock(&heap->mutex); -- -- if (!resource->map_count) -- { -- WARN("Resource %p is not mapped.\n", resource); -- goto done; -- } -- -- --resource->map_count; -- if (resource->map_count) -- goto done; -- -- if (!heap->map_count) -- { -- ERR("Heap %p is not mapped.\n", heap); -- goto done; -- } -- -- --heap->map_count; -- if (!heap->map_count) -- { -- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- -- TRACE("Unmapping heap %p, ptr %p.\n", heap, heap->map_ptr); -- -- VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); -- heap->map_ptr = NULL; -- } -- --done: -- vkd3d_mutex_unlock(&heap->mutex); --} -- - static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) - { - if (!resource && !desc->SizeInBytes) -@@ -552,15 +471,23 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1 - return S_OK; - } - -+static VkMemoryPropertyFlags d3d12_heap_get_memory_property_flags(const struct d3d12_heap *heap) -+{ -+ return heap->device->memory_properties.memoryTypes[heap->vk_memory_type].propertyFlags; -+} -+ - static HRESULT d3d12_heap_init(struct d3d12_heap *heap, - struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) - { -+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkMemoryRequirements memory_requirements; - VkDeviceSize vk_memory_size; -+ VkResult vr; - HRESULT hr; - - heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; - heap->refcount = 1; -+ heap->resource_count = 0; - - heap->is_private = !!resource; - -@@ -628,6 +555,20 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, - heap->device = device; - if (!heap->is_private) - d3d12_device_add_ref(heap->device); -+ else -+ heap->resource_count = 1; -+ -+ if (d3d12_heap_get_memory_property_flags(heap) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) -+ { -+ if ((vr = VK_CALL(vkMapMemory(device->vk_device, -+ heap->vk_memory, 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) -+ { -+ heap->map_ptr = NULL; -+ ERR("Failed to map memory, vr %d.\n", vr); -+ d3d12_heap_destroy(heap); -+ return hresult_from_vk_result(hr); -+ } -+ } - - return S_OK; - } -@@ -838,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - VkImageFormatListCreateInfoKHR format_list; - const struct vkd3d_format *format; - VkImageCreateInfo image_info; -+ uint32_t count; - VkResult vr; - - if (resource) -@@ -973,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, - if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) - resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; - -+ if (sparse_resource) -+ { -+ count = 0; -+ VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, -+ image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); -+ -+ if (!count) -+ { -+ FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", -+ image_info.format, image_info.imageType, image_info.samples, image_info.usage); -+ return E_INVALIDARG; -+ } -+ } -+ - if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) - WARN("Failed to create Vulkan image, vr %d.\n", vr); - -@@ -987,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - D3D12_RESOURCE_DESC validated_desc; - VkMemoryRequirements requirements; - VkImage vk_image; -+ bool tiled; - HRESULT hr; - - assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -@@ -999,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - desc = &validated_desc; - } - -+ tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; -+ - /* XXX: We have to create an image to get its memory requirements. */ -- if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) -+ if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) - { - VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); - VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); -@@ -1012,6 +971,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - return hr; - } - -+static void d3d12_resource_tile_info_cleanup(struct d3d12_resource *resource) -+{ -+ vkd3d_free(resource->tiles.subresources); -+} -+ - static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12_device *device) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -@@ -1027,8 +991,10 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 - else - VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); - -- if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) -- d3d12_heap_destroy(resource->heap); -+ d3d12_resource_tile_info_cleanup(resource); -+ -+ if (resource->heap) -+ d3d12_heap_resource_destroyed(resource->heap); - } - - static ULONG d3d12_resource_incref(struct d3d12_resource *resource) -@@ -1098,12 +1064,196 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, - box->back = d3d12_resource_desc_get_depth(&resource->desc, level); - } - --/* ID3D12Resource */ --static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+static void compute_image_subresource_size_in_tiles(const VkExtent3D *tile_extent, -+ const struct D3D12_RESOURCE_DESC *desc, unsigned int miplevel_idx, -+ struct vkd3d_tiled_region_extent *size) - { -- return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+ unsigned int width, height, depth; -+ -+ width = d3d12_resource_desc_get_width(desc, miplevel_idx); -+ height = d3d12_resource_desc_get_height(desc, miplevel_idx); -+ depth = d3d12_resource_desc_get_depth(desc, miplevel_idx); -+ size->width = (width + tile_extent->width - 1) / tile_extent->width; -+ size->height = (height + tile_extent->height - 1) / tile_extent->height; -+ size->depth = (depth + tile_extent->depth - 1) / tile_extent->depth; - } - -+void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, -+ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, -+ UINT *subresource_tiling_count, UINT first_subresource_tiling, -+ D3D12_SUBRESOURCE_TILING *subresource_tilings) -+{ -+ unsigned int i, subresource, subresource_count, miplevel_idx, count; -+ const struct vkd3d_subresource_tile_info *tile_info; -+ const VkExtent3D *tile_extent; -+ -+ tile_extent = &resource->tiles.tile_extent; -+ -+ if (packed_mip_info) -+ { -+ packed_mip_info->NumStandardMips = resource->tiles.standard_mip_count; -+ packed_mip_info->NumPackedMips = resource->desc.MipLevels - packed_mip_info->NumStandardMips; -+ packed_mip_info->NumTilesForPackedMips = !!resource->tiles.packed_mip_tile_count; /* non-zero dummy value */ -+ packed_mip_info->StartTileIndexInOverallResource = packed_mip_info->NumPackedMips -+ ? resource->tiles.subresources[resource->tiles.standard_mip_count].offset : 0; -+ } -+ -+ if (standard_tile_shape) -+ { -+ /* D3D12 docs say tile shape is cleared to zero if there is no standard mip, but drivers don't to do this. */ -+ standard_tile_shape->WidthInTexels = tile_extent->width; -+ standard_tile_shape->HeightInTexels = tile_extent->height; -+ standard_tile_shape->DepthInTexels = tile_extent->depth; -+ } -+ -+ if (total_tile_count) -+ *total_tile_count = resource->tiles.total_count; -+ -+ if (!subresource_tiling_count) -+ return; -+ -+ subresource_count = resource->tiles.subresource_count; -+ -+ count = subresource_count - min(first_subresource_tiling, subresource_count); -+ count = min(count, *subresource_tiling_count); -+ -+ for (i = 0; i < count; ++i) -+ { -+ subresource = i + first_subresource_tiling; -+ miplevel_idx = subresource % resource->desc.MipLevels; -+ if (miplevel_idx >= resource->tiles.standard_mip_count) -+ { -+ memset(&subresource_tilings[i], 0, sizeof(subresource_tilings[i])); -+ subresource_tilings[i].StartTileIndexInOverallResource = D3D12_PACKED_TILE; -+ continue; -+ } -+ -+ tile_info = &resource->tiles.subresources[subresource]; -+ subresource_tilings[i].StartTileIndexInOverallResource = tile_info->offset; -+ subresource_tilings[i].WidthInTiles = tile_info->extent.width; -+ subresource_tilings[i].HeightInTiles = tile_info->extent.height; -+ subresource_tilings[i].DepthInTiles = tile_info->extent.depth; -+ } -+ *subresource_tiling_count = i; -+} -+ -+static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3d12_device *device) -+{ -+ unsigned int i, start_idx, subresource_count, tile_count, miplevel_idx; -+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -+ VkSparseImageMemoryRequirements *sparse_requirements_array; -+ VkSparseImageMemoryRequirements sparse_requirements = {0}; -+ struct vkd3d_subresource_tile_info *tile_info; -+ VkMemoryRequirements requirements; -+ const VkExtent3D *tile_extent; -+ uint32_t requirement_count; -+ -+ subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); -+ -+ if (!(resource->tiles.subresources = vkd3d_calloc(subresource_count, sizeof(*resource->tiles.subresources)))) -+ { -+ ERR("Failed to allocate subresource info array.\n"); -+ return false; -+ } -+ -+ if (d3d12_resource_is_buffer(resource)) -+ { -+ assert(subresource_count == 1); -+ -+ VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); -+ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); -+ -+ tile_info = &resource->tiles.subresources[0]; -+ tile_info->offset = 0; -+ tile_info->extent.width = align(resource->desc.Width, D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; -+ tile_info->extent.height = 1; -+ tile_info->extent.depth = 1; -+ tile_info->count = tile_info->extent.width; -+ -+ resource->tiles.tile_extent.width = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; -+ resource->tiles.tile_extent.height = 1; -+ resource->tiles.tile_extent.depth = 1; -+ resource->tiles.total_count = tile_info->extent.width; -+ resource->tiles.subresource_count = 1; -+ resource->tiles.standard_mip_count = 1; -+ resource->tiles.packed_mip_tile_count = 0; -+ } -+ else -+ { -+ VK_CALL(vkGetImageMemoryRequirements(device->vk_device, resource->u.vk_image, &requirements)); -+ if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -+ FIXME("Vulkan device tile size is greater than the standard D3D12 tile size.\n"); -+ -+ requirement_count = 0; -+ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, &requirement_count, NULL)); -+ if (!(sparse_requirements_array = vkd3d_calloc(requirement_count, sizeof(*sparse_requirements_array)))) -+ { -+ ERR("Failed to allocate sparse requirements array.\n"); -+ return false; -+ } -+ VK_CALL(vkGetImageSparseMemoryRequirements(device->vk_device, resource->u.vk_image, -+ &requirement_count, sparse_requirements_array)); -+ -+ for (i = 0; i < requirement_count; ++i) -+ { -+ if (sparse_requirements_array[i].formatProperties.aspectMask & resource->format->vk_aspect_mask) -+ { -+ if (sparse_requirements.formatProperties.aspectMask) -+ { -+ WARN("Ignoring properties for aspect mask %#x.\n", -+ sparse_requirements_array[i].formatProperties.aspectMask); -+ } -+ else -+ { -+ sparse_requirements = sparse_requirements_array[i]; -+ } -+ } -+ } -+ vkd3d_free(sparse_requirements_array); -+ if (!sparse_requirements.formatProperties.aspectMask) -+ { -+ WARN("Failed to get sparse requirements.\n"); -+ return false; -+ } -+ -+ resource->tiles.tile_extent = sparse_requirements.formatProperties.imageGranularity; -+ resource->tiles.subresource_count = subresource_count; -+ resource->tiles.standard_mip_count = sparse_requirements.imageMipTailSize -+ ? sparse_requirements.imageMipTailFirstLod : resource->desc.MipLevels; -+ resource->tiles.packed_mip_tile_count = (resource->tiles.standard_mip_count < resource->desc.MipLevels) -+ ? sparse_requirements.imageMipTailSize / requirements.alignment : 0; -+ -+ for (i = 0, start_idx = 0; i < subresource_count; ++i) -+ { -+ miplevel_idx = i % resource->desc.MipLevels; -+ -+ tile_extent = &sparse_requirements.formatProperties.imageGranularity; -+ tile_info = &resource->tiles.subresources[i]; -+ compute_image_subresource_size_in_tiles(tile_extent, &resource->desc, miplevel_idx, &tile_info->extent); -+ tile_info->offset = start_idx; -+ tile_info->count = 0; -+ -+ if (miplevel_idx < resource->tiles.standard_mip_count) -+ { -+ tile_count = tile_info->extent.width * tile_info->extent.height * tile_info->extent.depth; -+ start_idx += tile_count; -+ tile_info->count = tile_count; -+ } -+ else if (miplevel_idx == resource->tiles.standard_mip_count) -+ { -+ tile_info->count = 1; /* Non-zero dummy value */ -+ start_idx += 1; -+ } -+ } -+ resource->tiles.total_count = start_idx; -+ } -+ -+ return true; -+} -+ -+/* ID3D12Resource */ - static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, - REFIID riid, void **object) - { -@@ -1223,12 +1373,55 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, - return d3d12_device_query_interface(resource->device, iid, device); - } - -+static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) -+{ -+ assert(resource->heap->map_ptr); -+ return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; -+} -+ -+static void d3d12_resource_get_vk_range(struct d3d12_resource *resource, -+ uint64_t offset, uint64_t size, VkMappedMemoryRange *vk_range) -+{ -+ vk_range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; -+ vk_range->pNext = NULL; -+ vk_range->memory = resource->heap->vk_memory; -+ vk_range->offset = resource->heap_offset + offset; -+ vk_range->size = size; -+} -+ -+static void d3d12_resource_invalidate(struct d3d12_resource *resource, uint64_t offset, uint64_t size) -+{ -+ const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; -+ VkMappedMemoryRange vk_range; -+ VkResult vr; -+ -+ if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) -+ return; -+ -+ d3d12_resource_get_vk_range(resource, offset, size, &vk_range); -+ if ((vr = VK_CALL(vkInvalidateMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) -+ ERR("Failed to invalidate memory, vr %d.\n", vr); -+} -+ -+static void d3d12_resource_flush(struct d3d12_resource *resource, uint64_t offset, uint64_t size) -+{ -+ const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; -+ VkMappedMemoryRange vk_range; -+ VkResult vr; -+ -+ if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) -+ return; -+ -+ d3d12_resource_get_vk_range(resource, offset, size, &vk_range); -+ if ((vr = VK_CALL(vkFlushMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) -+ ERR("Failed to flush memory, vr %d.\n", vr); -+} -+ - static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT sub_resource, - const D3D12_RANGE *read_range, void **data) - { - struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); - unsigned int sub_resource_count; -- HRESULT hr; - - TRACE("iface %p, sub_resource %u, read_range %p, data %p.\n", - iface, sub_resource, read_range, data); -@@ -1259,15 +1452,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT - return E_NOTIMPL; - } - -- WARN("Ignoring read range %p.\n", read_range); -- -- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) -- WARN("Failed to map resource %p, hr %#x.\n", resource, hr); -- - if (data) -+ { -+ *data = d3d12_resource_get_map_ptr(resource); - TRACE("Returning pointer %p.\n", *data); -+ } - -- return hr; -+ if (!read_range) -+ d3d12_resource_invalidate(resource, 0, resource->desc.Width); -+ else if (read_range->End > read_range->Begin) -+ d3d12_resource_invalidate(resource, read_range->Begin, read_range->End - read_range->Begin); -+ -+ return S_OK; - } - - static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT sub_resource, -@@ -1286,9 +1482,10 @@ static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT s - return; - } - -- WARN("Ignoring written range %p.\n", written_range); -- -- d3d12_heap_unmap(resource->heap, resource); -+ if (!written_range) -+ d3d12_resource_flush(resource, 0, resource->desc.Width); -+ else if (written_range->End > written_range->Begin) -+ d3d12_resource_flush(resource, written_range->Begin, written_range->End - written_range->Begin); - } - - static D3D12_RESOURCE_DESC * STDMETHODCALLTYPE d3d12_resource_GetDesc(ID3D12Resource *iface, -@@ -1320,10 +1517,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc - VkImageSubresource vk_sub_resource; - const struct vkd3d_format *format; - VkSubresourceLayout vk_layout; -+ uint64_t dst_offset, dst_size; - struct d3d12_device *device; - uint8_t *dst_data; - D3D12_BOX box; -- HRESULT hr; - - TRACE("iface %p, src_data %p, src_row_pitch %u, src_slice_pitch %u, " - "dst_sub_resource %u, dst_box %s.\n", -@@ -1381,20 +1578,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc - TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", - vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); - -- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&dst_data))) -- { -- WARN("Failed to map resource %p, hr %#x.\n", resource, hr); -- return hr; -- } -- -- dst_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, -+ dst_data = d3d12_resource_get_map_ptr(resource); -+ dst_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, - vk_layout.depthPitch, dst_box->left, dst_box->top, dst_box->front); -+ dst_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, -+ vk_layout.depthPitch, dst_box->right, dst_box->bottom - 1, dst_box->back - 1) - dst_offset; - - vkd3d_format_copy_data(format, src_data, src_row_pitch, src_slice_pitch, -- dst_data, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, -+ dst_data + dst_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, - dst_box->bottom - dst_box->top, dst_box->back - dst_box->front); - -- d3d12_heap_unmap(resource->heap, resource); -+ d3d12_resource_flush(resource, dst_offset, dst_size); - - return S_OK; - } -@@ -1408,10 +1602,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour - VkImageSubresource vk_sub_resource; - const struct vkd3d_format *format; - VkSubresourceLayout vk_layout; -+ uint64_t src_offset, src_size; - struct d3d12_device *device; - uint8_t *src_data; - D3D12_BOX box; -- HRESULT hr; - - TRACE("iface %p, dst_data %p, dst_row_pitch %u, dst_slice_pitch %u, " - "src_sub_resource %u, src_box %s.\n", -@@ -1469,21 +1663,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour - TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", - vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); - -- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&src_data))) -- { -- WARN("Failed to map resource %p, hr %#x.\n", resource, hr); -- return hr; -- } -- -- src_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, -+ src_data = d3d12_resource_get_map_ptr(resource); -+ src_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, - vk_layout.depthPitch, src_box->left, src_box->top, src_box->front); -+ src_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, -+ vk_layout.depthPitch, src_box->right, src_box->bottom - 1, src_box->back - 1) - src_offset; -+ -+ d3d12_resource_invalidate(resource, src_offset, src_size); - -- vkd3d_format_copy_data(format, src_data, vk_layout.rowPitch, vk_layout.depthPitch, -+ vkd3d_format_copy_data(format, src_data + src_offset, vk_layout.rowPitch, vk_layout.depthPitch, - dst_data, dst_row_pitch, dst_slice_pitch, src_box->right - src_box->left, - src_box->bottom - src_box->top, src_box->back - src_box->front); - -- d3d12_heap_unmap(resource->heap, resource); -- - return S_OK; - } - -@@ -1679,6 +1870,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d - return E_INVALIDARG; - } - -+ if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) -+ { -+ if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) -+ { -+ WARN("The device does not support tiled 3D images.\n"); -+ return E_INVALIDARG; -+ } -+ if (format->plane_count > 1) -+ { -+ WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", -+ format->dxgi_format); -+ return E_INVALIDARG; -+ } -+ } -+ - if (!d3d12_resource_validate_texture_format(desc, format) - || !d3d12_resource_validate_texture_alignment(desc, format)) - return E_INVALIDARG; -@@ -1740,6 +1946,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - - resource->desc = *desc; - -+ if (!heap_properties && !device->vk_info.sparse_binding) -+ { -+ WARN("The device does not support tiled images.\n"); -+ return E_INVALIDARG; -+ } -+ - if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) - return E_INVALIDARG; - -@@ -1805,6 +2017,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 - resource->heap = NULL; - resource->heap_offset = 0; - -+ memset(&resource->tiles, 0, sizeof(resource->tiles)); -+ - if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) - { - d3d12_resource_destroy(resource, device); -@@ -1941,6 +2155,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, - { - resource->heap = heap; - resource->heap_offset = heap_offset; -+ InterlockedIncrement(&heap->resource_count); - } - else - { -@@ -1989,6 +2204,12 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - desc, initial_state, optimized_clear_value, &object))) - return hr; - -+ if (!d3d12_resource_init_tiles(object, device)) -+ { -+ d3d12_resource_Release(&object->ID3D12Resource_iface); -+ return E_OUTOFMEMORY; -+ } -+ - TRACE("Created reserved resource %p.\n", object); - - *resource = object; -@@ -1999,7 +2220,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, - HRESULT vkd3d_create_image_resource(ID3D12Device *device, - const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) - { -- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device(device); -+ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device1((ID3D12Device1 *)device); - struct d3d12_resource *object; - HRESULT hr; - -@@ -2061,24 +2282,101 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) - return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); - } - --/* CBVs, SRVs, UAVs */ --static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) -+#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1) -+ -+/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an -+ * object freed by another thread. This could be implemented as a single atomic linked -+ * list, but it requires handling the ABA problem, which brings issues with cross-platform -+ * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */ -+static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) - { -- struct vkd3d_view *view; -+ union d3d12_desc_object u; -+ unsigned int i; -+ -+ STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK)); - -- if ((view = vkd3d_malloc(sizeof(*view)))) -+ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; -+ for (;;) - { -- view->refcount = 1; -- view->type = type; -- view->serial_id = InterlockedIncrement64(&object_global_serial_id); -- view->vk_counter_view = VK_NULL_HANDLE; -+ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ { -+ if ((u.object = cache->heads[i].head)) -+ { -+ vkd3d_atomic_decrement(&cache->free_count); -+ cache->heads[i].head = u.header->next; -+ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ return u.object; -+ } -+ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ } -+ /* Keeping a free count avoids uncertainty over when this loop should terminate, -+ * which could result in excess allocations gradually increasing without limit. */ -+ if (cache->free_count < ARRAY_SIZE(cache->heads)) -+ return vkd3d_malloc(cache->size); -+ -+ i = (i + 1) & HEAD_INDEX_MASK; - } -- return view; - } - --void vkd3d_view_incref(struct vkd3d_view *view) -+static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) - { -- InterlockedIncrement(&view->refcount); -+ union d3d12_desc_object u = {object}; -+ unsigned int i; -+ void *head; -+ -+ /* Using the same index as above may result in a somewhat uneven distribution, -+ * but the main objective is to avoid costly spinlock contention. */ -+ i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; -+ for (;;) -+ { -+ if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) -+ break; -+ i = (i + 1) & HEAD_INDEX_MASK; -+ } -+ -+ head = cache->heads[i].head; -+ u.header->next = head; -+ cache->heads[i].head = u.object; -+ vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); -+ vkd3d_atomic_increment(&cache->free_count); -+} -+ -+#undef HEAD_INDEX_MASK -+ -+static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) -+{ -+ struct vkd3d_cbuffer_desc *desc; -+ -+ if (!(desc = vkd3d_desc_object_cache_get(&device->cbuffer_desc_cache))) -+ return NULL; -+ -+ desc->h.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; -+ desc->h.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -+ desc->h.refcount = 1; -+ -+ return desc; -+} -+ -+static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_descriptor_type, -+ enum vkd3d_view_type type, struct d3d12_device *device) -+{ -+ struct vkd3d_view *view; -+ -+ assert(magic); -+ -+ if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) -+ { -+ ERR("Failed to allocate descriptor object.\n"); -+ return NULL; -+ } -+ -+ view->h.magic = magic; -+ view->h.vk_descriptor_type = vk_descriptor_type; -+ view->h.refcount = 1; -+ view->v.type = type; -+ view->v.vk_counter_view = VK_NULL_HANDLE; -+ -+ return view; - } - - static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) -@@ -2087,314 +2385,306 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev - - TRACE("Destroying view %p.\n", view); - -- switch (view->type) -+ switch (view->v.type) - { - case VKD3D_VIEW_TYPE_BUFFER: -- VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); -+ VK_CALL(vkDestroyBufferView(device->vk_device, view->v.u.vk_buffer_view, NULL)); - break; - case VKD3D_VIEW_TYPE_IMAGE: -- VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); -+ VK_CALL(vkDestroyImageView(device->vk_device, view->v.u.vk_image_view, NULL)); - break; - case VKD3D_VIEW_TYPE_SAMPLER: -- VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); -+ VK_CALL(vkDestroySampler(device->vk_device, view->v.u.vk_sampler, NULL)); - break; - default: -- WARN("Unhandled view type %d.\n", view->type); -+ WARN("Unhandled view type %d.\n", view->v.type); - } - -- if (view->vk_counter_view) -- VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); -+ if (view->v.vk_counter_view) -+ VK_CALL(vkDestroyBufferView(device->vk_device, view->v.vk_counter_view, NULL)); - -- vkd3d_free(view); -+ vkd3d_desc_object_cache_push(&device->view_desc_cache, view); - } - --void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) -+void vkd3d_view_decref(void *view, struct d3d12_device *device) - { -- if (!InterlockedDecrement(&view->refcount)) -- vkd3d_view_destroy(view, device); -+ union d3d12_desc_object u = {view}; -+ -+ if (vkd3d_atomic_decrement(&u.header->refcount)) -+ return; -+ -+ if (u.header->magic != VKD3D_DESCRIPTOR_MAGIC_CBV) -+ vkd3d_view_destroy(u.view, device); -+ else -+ vkd3d_desc_object_cache_push(&device->cbuffer_desc_cache, u.object); - } - --/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ --static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set, -- struct d3d12_desc_copy_location *locations, unsigned int write_count) -+static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) - { -- unsigned int i, info_index = 0, write_index = 0; -+ if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) -+ vkd3d_view_decref(view, device); -+} - -- switch (locations[0].src.s.vk_descriptor_type) -- { -- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: -- for (; write_index < write_count; ++write_index) -- { -- descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; -- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) -- descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.s.u.vk_cbv_info; -- } -- break; -- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: -- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: -- for (; write_index < write_count; ++write_index) -- { -- descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; -- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) -- descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.s.u.view_info.view->u.vk_image_view; -- } -- break; -- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: -- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: -- for (; write_index < write_count; ++write_index) -- { -- descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; -- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) -- descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.s.u.view_info.view->u.vk_buffer_view; -- } -- break; -- case VK_DESCRIPTOR_TYPE_SAMPLER: -- for (; write_index < write_count; ++write_index) -- { -- descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; -- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) -- descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.s.u.view_info.view->u.vk_sampler; -- } -- break; -- default: -- ERR("Unhandled descriptor type %#x.\n", locations[0].src.s.vk_descriptor_type); -- break; -- } -+#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 24 -+ -+struct descriptor_writes -+{ -+ VkDescriptorBufferInfo null_vk_cbv_info; -+ VkBufferView null_vk_buffer_view; -+ VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -+ VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -+ void *held_refs[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -+ unsigned int count; -+ unsigned int held_ref_count; -+}; -+ -+static void descriptor_writes_free_object_refs(struct descriptor_writes *writes, struct d3d12_device *device) -+{ -+ unsigned int i; -+ for (i = 0; i < writes->held_ref_count; ++i) -+ vkd3d_view_decref(writes->held_refs[i], device); -+ writes->held_ref_count = 0; - } - - static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_heap *descriptor_heap, -- uint32_t dst_array_element, const struct d3d12_device *device) -+ uint32_t dst_array_element, struct descriptor_writes *writes, struct d3d12_device *device) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - struct d3d12_descriptor_heap_vk_set *descriptor_set; -- VkBufferView vk_buffer_view = VK_NULL_HANDLE; -- enum vkd3d_vk_descriptor_set_index i; -- VkDescriptorBufferInfo vk_cbv_info; -- -- vk_cbv_info.buffer = VK_NULL_HANDLE; -- vk_cbv_info.offset = 0; -- vk_cbv_info.range = VK_WHOLE_SIZE; -+ enum vkd3d_vk_descriptor_set_index set; -+ unsigned int i = writes->count; - - /* Binding a shader with the wrong null descriptor type works in Windows. - * To support that here we must write one to all applicable Vulkan sets. */ -- for (i = VKD3D_SET_INDEX_UNIFORM_BUFFER; i <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++i) -- { -- descriptor_set = &descriptor_heap->vk_descriptor_sets[i]; -- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst_array_element; -- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; -- switch (i) -+ for (set = VKD3D_SET_INDEX_UNIFORM_BUFFER; set <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++set) -+ { -+ descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; -+ writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; -+ writes->vk_descriptor_writes[i].pNext = NULL; -+ writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; -+ writes->vk_descriptor_writes[i].dstBinding = 0; -+ writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; -+ writes->vk_descriptor_writes[i].descriptorCount = 1; -+ writes->vk_descriptor_writes[i].descriptorType = descriptor_set->vk_type; -+ switch (set) - { - case VKD3D_SET_INDEX_UNIFORM_BUFFER: -- descriptor_set->vk_descriptor_writes[0].pBufferInfo = &vk_cbv_info; -+ writes->vk_descriptor_writes[i].pImageInfo = NULL; -+ writes->vk_descriptor_writes[i].pBufferInfo = &writes->null_vk_cbv_info; -+ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; - break; - case VKD3D_SET_INDEX_SAMPLED_IMAGE: - case VKD3D_SET_INDEX_STORAGE_IMAGE: -- descriptor_set->vk_image_infos[0].imageView = VK_NULL_HANDLE; -+ writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; -+ writes->vk_descriptor_writes[i].pBufferInfo = NULL; -+ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; -+ writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; -+ writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; -+ writes->vk_image_infos[i].imageLayout = (set == VKD3D_SET_INDEX_STORAGE_IMAGE) -+ ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - break; - case VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER: - case VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER: -- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &vk_buffer_view; -+ writes->vk_descriptor_writes[i].pImageInfo = NULL; -+ writes->vk_descriptor_writes[i].pBufferInfo = NULL; -+ writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; - break; - default: - assert(false); - break; - } -- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); -+ if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) -+ continue; -+ VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); -+ descriptor_writes_free_object_refs(writes, device); -+ i = 0; - } -+ -+ writes->count = i; - } - --/* dst and src contain the same data unless another thread overwrites dst. The array index is -- * calculated from dst, and src is thread safe. */ --static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, -- struct d3d12_device *device) -+static void d3d12_desc_write_vk_heap(struct d3d12_descriptor_heap *descriptor_heap, unsigned int dst_array_element, -+ struct descriptor_writes *writes, void *object, struct d3d12_device *device) - { - struct d3d12_descriptor_heap_vk_set *descriptor_set; -- struct d3d12_descriptor_heap *descriptor_heap; - const struct vkd3d_vk_device_procs *vk_procs; -+ union d3d12_desc_object u = {object}; -+ unsigned int i = writes->count; -+ VkDescriptorType type; - bool is_null = false; - -- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); -- descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( -- src->s.vk_descriptor_type)]; -+ type = u.header->vk_descriptor_type; -+ descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(type)]; - vk_procs = &device->vk_procs; - -- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); -- -- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; -- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; -- switch (src->s.vk_descriptor_type) -+ writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; -+ writes->vk_descriptor_writes[i].pNext = NULL; -+ writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; -+ writes->vk_descriptor_writes[i].dstBinding = 0; -+ writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; -+ writes->vk_descriptor_writes[i].descriptorCount = 1; -+ writes->vk_descriptor_writes[i].descriptorType = type; -+ switch (type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: -- descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->s.u.vk_cbv_info; -- is_null = !src->s.u.vk_cbv_info.buffer; -+ writes->vk_descriptor_writes[i].pImageInfo = NULL; -+ writes->vk_descriptor_writes[i].pBufferInfo = &u.cb_desc->vk_cbv_info; -+ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; -+ is_null = !u.cb_desc->vk_cbv_info.buffer; - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: -- is_null = !(descriptor_set->vk_image_infos[0].imageView = src->s.u.view_info.view->u.vk_image_view); -+ writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; -+ writes->vk_descriptor_writes[i].pBufferInfo = NULL; -+ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; -+ writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; -+ is_null = !(writes->vk_image_infos[i].imageView = u.view->v.u.vk_image_view); -+ writes->vk_image_infos[i].imageLayout = (type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) -+ ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: -- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->u.vk_buffer_view; -- is_null = !src->s.u.view_info.view->u.vk_buffer_view; -+ writes->vk_descriptor_writes[i].pImageInfo = NULL; -+ writes->vk_descriptor_writes[i].pBufferInfo = NULL; -+ writes->vk_descriptor_writes[i].pTexelBufferView = &u.view->v.u.vk_buffer_view; -+ is_null = !u.view->v.u.vk_buffer_view; - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: -- descriptor_set->vk_image_infos[0].sampler = src->s.u.view_info.view->u.vk_sampler; -+ writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; -+ writes->vk_descriptor_writes[i].pBufferInfo = NULL; -+ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; -+ writes->vk_image_infos[i].sampler = u.view->v.u.vk_sampler; -+ writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; -+ writes->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - break; - default: -- ERR("Unhandled descriptor type %#x.\n", src->s.vk_descriptor_type); -+ ERR("Unhandled descriptor type %#x.\n", type); - break; - } - if (is_null && device->vk_info.EXT_robustness2) -- { -- d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, -- descriptor_set->vk_descriptor_writes[0].dstArrayElement, device); -- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); -- return; -- } -- -- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); -+ return d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, dst_array_element, writes, device); - -- if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) -+ ++i; -+ if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view->v.vk_counter_view) - { - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; -- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; -- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; -- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; -- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); -+ writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; -+ writes->vk_descriptor_writes[i].pNext = NULL; -+ writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; -+ writes->vk_descriptor_writes[i].dstBinding = 0; -+ writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; -+ writes->vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -+ writes->vk_descriptor_writes[i].descriptorCount = 1; -+ writes->vk_descriptor_writes[i].pImageInfo = NULL; -+ writes->vk_descriptor_writes[i].pBufferInfo = NULL; -+ writes->vk_descriptor_writes[i++].pTexelBufferView = &u.view->v.vk_counter_view; - } - -- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); --} -- --static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) --{ -- struct vkd3d_view *defunct_view; -- struct vkd3d_mutex *mutex; -- -- mutex = d3d12_device_get_descriptor_mutex(device, dst); -- vkd3d_mutex_lock(mutex); -- -- if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) -+ if (i >= ARRAY_SIZE(writes->vk_descriptor_writes) - 1) - { -- d3d12_desc_copy_raw(dst, src); -- vkd3d_mutex_unlock(mutex); -- return; -+ VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); -+ descriptor_writes_free_object_refs(writes, device); -+ i = 0; - } - -- defunct_view = dst->s.u.view_info.view; -- d3d12_desc_copy_raw(dst, src); -- vkd3d_mutex_unlock(mutex); -- -- /* Destroy the view after unlocking to reduce wait time. */ -- vkd3d_view_destroy(defunct_view, device); -+ writes->count = i; - } - --void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -- struct d3d12_device *device) -+void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) - { -- struct vkd3d_view *defunct_view = NULL; -- struct vkd3d_mutex *mutex; -+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -+ struct d3d12_desc *descriptors, *src; -+ struct descriptor_writes writes; -+ union d3d12_desc_object u; -+ unsigned int i, next; - -- mutex = d3d12_device_get_descriptor_mutex(device, dst); -- vkd3d_mutex_lock(mutex); -+ if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) -+ return; - -- /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ -- if ((dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) -- && !InterlockedDecrement(&dst->s.u.view_info.view->refcount)) -- defunct_view = dst->s.u.view_info.view; -+ writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; -+ writes.null_vk_cbv_info.offset = 0; -+ writes.null_vk_cbv_info.range = VK_WHOLE_SIZE; -+ writes.null_vk_buffer_view = VK_NULL_HANDLE; -+ writes.count = 0; -+ writes.held_ref_count = 0; - -- d3d12_desc_copy_raw(dst, src); -+ descriptors = (struct d3d12_desc *)descriptor_heap->descriptors; - -- vkd3d_mutex_unlock(mutex); -+ for (; i != UINT_MAX; i = next) -+ { -+ src = &descriptors[i]; -+ next = (int)src->next >> 1; - -- /* Destroy the view after unlocking to reduce wait time. */ -- if (defunct_view) -- vkd3d_view_destroy(defunct_view, device); -+ u.object = d3d12_desc_get_object_ref(src, device); - -- if (device->use_vk_heaps && dst->s.magic) -- d3d12_desc_write_vk_heap(dst, src, device); --} -+ if (!u.object) -+ { -+ vkd3d_atomic_exchange(&src->next, 0); -+ continue; -+ } - --static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) --{ -- static const struct d3d12_desc null_desc = {0}; -+ writes.held_refs[writes.held_ref_count++] = u.object; -+ d3d12_desc_write_vk_heap(descriptor_heap, i, &writes, u.object, device); -+ -+ vkd3d_atomic_exchange(&src->next, 0); -+ } - -- d3d12_desc_write_atomic(descriptor, &null_desc, device); -+ /* Avoid thunk calls wherever possible. */ -+ if (writes.count) -+ VK_CALL(vkUpdateDescriptorSets(device->vk_device, writes.count, writes.vk_descriptor_writes, 0, NULL)); -+ descriptor_writes_free_object_refs(&writes, device); - } - --void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, -- struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, -- struct d3d12_device *device) -+static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_descriptor_heap *descriptor_heap) - { -- struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; -- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- unsigned int i, write_count; -+ unsigned int i, head; - -- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); -+ i = dst->index; -+ head = descriptor_heap->dirty_list_head; - -- for (i = 0, write_count = 0; i < info->count; ++i) -+ /* Only one thread can swap the value away from zero. */ -+ if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) -+ return; -+ /* Now it is safe to modify 'next' to another nonzero value if necessary. */ -+ while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) - { -- d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); -+ head = descriptor_heap->dirty_list_head; -+ vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); -+ } -+} - -- if (i && locations[i].dst == locations[i - 1].dst + 1) -- { -- ++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount; -- continue; -- } -- /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ -- descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst -- - (const struct d3d12_desc *)descriptor_heap->descriptors; -- descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; -- } -- d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count); -- /* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index -- * for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src -- * descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */ -- VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); -- -- if (!info->uav_counter) -- goto done; -+static inline void descriptor_heap_write_atomic(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_desc *dst, -+ const struct d3d12_desc *src, struct d3d12_device *device) -+{ -+ void *object = src->s.u.object; - -- descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; -+ d3d12_desc_replace(dst, object, device); -+ if (descriptor_heap->use_vk_heaps && object && !dst->next) -+ d3d12_desc_mark_as_modified(dst, descriptor_heap); -+} - -- for (i = 0, write_count = 0; i < info->count; ++i) -- { -- if (!locations[i].src.s.u.view_info.view->vk_counter_view) -- continue; -- descriptor_set->vk_buffer_views[write_count] = locations[i].src.s.u.view_info.view->vk_counter_view; -- descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; -- /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ -- descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst -- - (const struct d3d12_desc *)descriptor_heap->descriptors; -- descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; -- } -- VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); -+void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, -+ struct d3d12_device *device) -+{ -+ descriptor_heap_write_atomic(d3d12_desc_get_descriptor_heap(dst), dst, src, device); -+} - --done: -- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); -+static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) -+{ -+ d3d12_desc_replace(descriptor, NULL, device); - } - --void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, -+/* This is a major performance bottleneck for some games, so do not load the device -+ * pointer from dst_heap. In some cases device will not be used. */ -+void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, - struct d3d12_device *device) - { - struct d3d12_desc tmp; -- struct vkd3d_mutex *mutex; - - assert(dst != src); - -- /* Shadow of the Tomb Raider and possibly other titles sometimes destroy -- * and rewrite a descriptor in another thread while it is being copied. */ -- mutex = d3d12_device_get_descriptor_mutex(device, src); -- vkd3d_mutex_lock(mutex); -- -- if (src->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) -- vkd3d_view_incref(src->s.u.view_info.view); -- -- d3d12_desc_copy_raw(&tmp, src); -- -- vkd3d_mutex_unlock(mutex); -- -- d3d12_desc_write_atomic(dst, &tmp, device); -+ tmp.s.u.object = d3d12_desc_get_object_ref(src, device); -+ descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); - } - - static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, -@@ -2455,8 +2745,9 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, - return vr == VK_SUCCESS; - } - --bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, -- VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view) -+bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, -+ const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, -+ struct vkd3d_view **view) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkBufferView vk_view = VK_NULL_HANDLE; -@@ -2465,16 +2756,18 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c - if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) - return false; - -- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER))) -+ if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV -+ ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, -+ VKD3D_VIEW_TYPE_BUFFER, device))) - { - VK_CALL(vkDestroyBufferView(device->vk_device, vk_view, NULL)); - return false; - } - -- object->u.vk_buffer_view = vk_view; -- object->format = format; -- object->info.buffer.offset = offset; -- object->info.buffer.size = size; -+ object->v.u.vk_buffer_view = vk_view; -+ object->v.format = format; -+ object->v.info.buffer.offset = offset; -+ object->v.info.buffer.size = size; - *view = object; - return true; - } -@@ -2482,7 +2775,7 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c - #define VKD3D_VIEW_RAW_BUFFER 0x1 - - static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, -- struct d3d12_resource *resource, DXGI_FORMAT view_format, -+ uint32_t magic, struct d3d12_resource *resource, DXGI_FORMAT view_format, - unsigned int offset, unsigned int size, unsigned int structure_stride, - unsigned int flags, struct vkd3d_view **view) - { -@@ -2513,7 +2806,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - - assert(d3d12_resource_is_buffer(resource)); - -- return vkd3d_create_buffer_view(device, resource->u.vk_buffer, -+ return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, - format, offset * element_size, size * element_size, view); - } - -@@ -2741,7 +3034,7 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de - desc->layer_count = max_layer_count; - } - --bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, -+bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, - const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -@@ -2774,18 +3067,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, - } - } - -- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE))) -+ if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE -+ : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VKD3D_VIEW_TYPE_IMAGE, device))) - { - VK_CALL(vkDestroyImageView(device->vk_device, vk_view, NULL)); - return false; - } - -- object->u.vk_image_view = vk_view; -- object->format = format; -- object->info.texture.vk_view_type = desc->view_type; -- object->info.texture.miplevel_idx = desc->miplevel_idx; -- object->info.texture.layer_idx = desc->layer_idx; -- object->info.texture.layer_count = desc->layer_count; -+ object->v.u.vk_image_view = vk_view; -+ object->v.format = format; -+ object->v.info.texture.vk_view_type = desc->view_type; -+ object->v.info.texture.miplevel_idx = desc->miplevel_idx; -+ object->v.info.texture.layer_idx = desc->layer_idx; -+ object->v.info.texture.layer_count = desc->layer_count; - *view = object; - return true; - } -@@ -2794,6 +3088,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, - struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) - { - struct VkDescriptorBufferInfo *buffer_info; -+ struct vkd3d_cbuffer_desc *cb_desc; - struct d3d12_resource *resource; - - if (!desc) -@@ -2802,13 +3097,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, - return; - } - -+ if (!(cb_desc = vkd3d_cbuffer_desc_create(device))) -+ { -+ ERR("Failed to allocate descriptor object.\n"); -+ return; -+ } -+ - if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) - { - WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); - return; - } - -- buffer_info = &descriptor->s.u.vk_cbv_info; -+ buffer_info = &cb_desc->vk_cbv_info; - if (desc->BufferLocation) - { - resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); -@@ -2824,8 +3125,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, - buffer_info->range = VK_WHOLE_SIZE; - } - -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -+ descriptor->s.u.cb_desc = cb_desc; - } - - static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) -@@ -2842,7 +3142,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, - { - struct vkd3d_null_resources *null_resources = &device->null_resources; - struct vkd3d_texture_view_desc vkd3d_desc; -- struct vkd3d_view *view; - VkImage vk_image; - - if (!desc) -@@ -2857,15 +3156,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, - if (!device->vk_info.EXT_robustness2) - WARN("Creating NULL buffer SRV %#x.\n", desc->Format); - -- if (vkd3d_create_buffer_view(device, null_resources->vk_buffer, -+ vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, null_resources->vk_buffer, - vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), -- 0, VKD3D_NULL_BUFFER_SIZE, &view)) -- { -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -- } -+ 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); - return; - - case D3D12_SRV_DIMENSION_TEXTURE2D: -@@ -2904,20 +3197,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, - vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; - vkd3d_desc.allowed_swizzle = true; - -- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) -- return; -- -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -+ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, vk_image, &vkd3d_desc, &descriptor->s.u.view); - } - - static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, - struct d3d12_device *device, struct d3d12_resource *resource, - const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) - { -- struct vkd3d_view *view; - unsigned int flags; - - if (!desc) -@@ -2933,15 +3219,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, - } - - flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); -- if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, -+ vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource, desc->Format, - desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, -- desc->u.Buffer.StructureByteStride, flags, &view)) -- return; -- -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -+ desc->u.Buffer.StructureByteStride, flags, &descriptor->s.u.view); - } - - static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, -@@ -2970,7 +3250,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, - const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) - { - struct vkd3d_texture_view_desc vkd3d_desc; -- struct vkd3d_view *view; - - if (!resource) - { -@@ -3002,6 +3281,11 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, - - switch (desc->ViewDimension) - { -+ case D3D12_SRV_DIMENSION_TEXTURE1D: -+ vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_1D; -+ vkd3d_desc.miplevel_idx = desc->u.Texture1D.MostDetailedMip; -+ vkd3d_desc.miplevel_count = desc->u.Texture1D.MipLevels; -+ break; - case D3D12_SRV_DIMENSION_TEXTURE2D: - vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; - vkd3d_desc.miplevel_idx = desc->u.Texture2D.MostDetailedMip; -@@ -3066,13 +3350,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, - } - } - -- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) -- return; -- -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -+ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource->u.vk_image, &vkd3d_desc, -+ &descriptor->s.u.view); - } - - static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) -@@ -3089,7 +3368,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, - { - struct vkd3d_null_resources *null_resources = &device->null_resources; - struct vkd3d_texture_view_desc vkd3d_desc; -- struct vkd3d_view *view; - VkImage vk_image; - - if (!desc) -@@ -3104,15 +3382,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, - if (!device->vk_info.EXT_robustness2) - WARN("Creating NULL buffer UAV %#x.\n", desc->Format); - -- if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer, -+ vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, null_resources->vk_storage_buffer, - vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), -- 0, VKD3D_NULL_BUFFER_SIZE, &view)) -- { -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -- } -+ 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); - return; - - case D3D12_UAV_DIMENSION_TEXTURE2D: -@@ -3150,13 +3422,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, - vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; - vkd3d_desc.allowed_swizzle = false; - -- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) -- return; -- -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -+ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, vk_image, &vkd3d_desc, &descriptor->s.u.view); - } - - static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, -@@ -3179,16 +3445,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ - } - - flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); -- if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, -+ if (!vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource, desc->Format, - desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, - desc->u.Buffer.StructureByteStride, flags, &view)) - return; - -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -- - if (counter_resource) - { - const struct vkd3d_format *format; -@@ -3198,13 +3459,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ - - format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); - if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, -- desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) -+ desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->v.vk_counter_view)) - { - WARN("Failed to create counter buffer view.\n"); -- view->vk_counter_view = VK_NULL_HANDLE; -- d3d12_desc_destroy(descriptor, device); -+ view->v.vk_counter_view = VK_NULL_HANDLE; -+ vkd3d_view_decref(view, device); -+ return; - } - } -+ -+ descriptor->s.u.view = view; - } - - static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, -@@ -3212,7 +3476,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, - const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) - { - struct vkd3d_texture_view_desc vkd3d_desc; -- struct vkd3d_view *view; - - if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) - return; -@@ -3227,6 +3490,9 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, - { - switch (desc->ViewDimension) - { -+ case D3D12_UAV_DIMENSION_TEXTURE1D: -+ vkd3d_desc.miplevel_idx = desc->u.Texture1D.MipSlice; -+ break; - case D3D12_UAV_DIMENSION_TEXTURE2D: - vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; - if (desc->u.Texture2D.PlaneSlice) -@@ -3257,13 +3523,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, - } - } - -- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) -- return; -- -- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; -- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; -- descriptor->s.u.view_info.view = view; -- descriptor->s.u.view_info.written_serial_id = view->serial_id; -+ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_image, &vkd3d_desc, -+ &descriptor->s.u.view); - } - - void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, -@@ -3291,12 +3552,26 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d - } - - bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, -- D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) -+ D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view) - { - const struct vkd3d_format *format; - struct d3d12_resource *resource; - - format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); -+ -+ if (!gpu_address) -+ { -+ if (device->vk_info.EXT_robustness2) -+ { -+ *vk_buffer_view = VK_NULL_HANDLE; -+ return true; -+ } -+ WARN("Creating null buffer view.\n"); -+ return vkd3d_create_vk_buffer_view(device, parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV -+ ? device->null_resources.vk_storage_buffer : device->null_resources.vk_buffer, -+ format, 0, VK_WHOLE_SIZE, vk_buffer_view); -+ } -+ - resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); - assert(d3d12_resource_is_buffer(resource)); - return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, -@@ -3412,21 +3687,21 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, - FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", - desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]); - -- if (!(view = vkd3d_view_create(VKD3D_VIEW_TYPE_SAMPLER))) -+ if (!(view = vkd3d_view_create(VKD3D_DESCRIPTOR_MAGIC_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER, -+ VKD3D_VIEW_TYPE_SAMPLER, device))) - return; -+ view->v.u.vk_sampler = VK_NULL_HANDLE; -+ view->v.format = NULL; - - if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, - desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, -- desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) -+ desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->v.u.vk_sampler) < 0) - { -- vkd3d_free(view); -+ vkd3d_view_decref(view, device); - return; - } - -- sampler->s.magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; -- sampler->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; -- sampler->s.u.view_info.view = view; -- sampler->s.u.view_info.written_serial_id = view->serial_id; -+ sampler->s.u.view = view; - } - - HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, -@@ -3448,7 +3723,7 @@ HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, - /* RTVs */ - static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_device *device) - { -- if (rtv->magic != VKD3D_DESCRIPTOR_MAGIC_RTV) -+ if (!rtv->view) - return; - - vkd3d_view_decref(rtv->view, device); -@@ -3527,10 +3802,9 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev - - assert(d3d12_resource_is_texture(resource)); - -- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) -+ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) - return; - -- rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; - rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); - rtv_desc->format = vkd3d_desc.format; - rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); -@@ -3543,7 +3817,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev - /* DSVs */ - static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_device *device) - { -- if (dsv->magic != VKD3D_DESCRIPTOR_MAGIC_DSV) -+ if (!dsv->view) - return; - - vkd3d_view_decref(dsv->view, device); -@@ -3612,10 +3886,9 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev - - assert(d3d12_resource_is_texture(resource)); - -- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) -+ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) - return; - -- dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; - dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); - dsv_desc->format = vkd3d_desc.format; - dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); -@@ -3810,7 +4083,15 @@ static D3D12_GPU_DESCRIPTOR_HANDLE * STDMETHODCALLTYPE d3d12_descriptor_heap_Get - - TRACE("iface %p, descriptor %p.\n", iface, descriptor); - -- descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; -+ if (heap->desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) -+ { -+ descriptor->ptr = (uint64_t)(intptr_t)heap->descriptors; -+ } -+ else -+ { -+ WARN("Heap %p is not shader-visible.\n", iface); -+ descriptor->ptr = 0; -+ } - - return descriptor; - } -@@ -3883,7 +4164,6 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; - VkDescriptorSetAllocateInfo set_desc; -- unsigned int i; - VkResult vr; - - set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; -@@ -3897,8 +4177,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript - set_size.pDescriptorCounts = &variable_binding_size; - if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) - { -- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) -- descriptor_set->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; -+ descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; - return S_OK; - } - -@@ -3914,9 +4193,8 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri - - descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; - memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); -- vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - -- if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV -+ if (!descriptor_heap->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV - && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) - return S_OK; - -@@ -3925,53 +4203,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri - - for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) - { -- struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; -- unsigned int i; -- -- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) -- { -- descriptor_set->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; -- descriptor_set->vk_descriptor_writes[i].pNext = NULL; -- descriptor_set->vk_descriptor_writes[i].dstBinding = 0; -- descriptor_set->vk_descriptor_writes[i].descriptorType = device->vk_descriptor_heap_layouts[set].type; -- descriptor_set->vk_descriptor_writes[i].pImageInfo = NULL; -- descriptor_set->vk_descriptor_writes[i].pBufferInfo = NULL; -- descriptor_set->vk_descriptor_writes[i].pTexelBufferView = NULL; -- } -- switch (device->vk_descriptor_heap_layouts[set].type) -- { -- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: -- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: -- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: -- break; -- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: -- descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; -- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) -- { -- descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; -- descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; -- } -- break; -- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: -- descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; -- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) -- { -- descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; -- descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; -- } -- break; -- case VK_DESCRIPTOR_TYPE_SAMPLER: -- descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; -- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) -- { -- descriptor_set->vk_image_infos[i].imageView = VK_NULL_HANDLE; -- descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; -- } -- break; -- default: -- ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); -- return E_FAIL; -- } - if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type - && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) - return hr; -@@ -3994,7 +4225,9 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript - if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) - return hr; - -+ descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); - d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); -+ vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - - d3d12_device_add_ref(descriptor_heap->device = device); - -@@ -4047,7 +4280,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, - { - memset(&dst[i].s, 0, sizeof(dst[i].s)); - dst[i].index = i; -+ dst[i].next = 0; - } -+ object->dirty_list_head = UINT_MAX; - } - else - { -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index c964ea8fe3a..0b92cffcde3 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -20,6 +20,7 @@ - - #include "vkd3d_private.h" - #include "vkd3d_shaders.h" -+#include "vkd3d_shader_utils.h" - - /* ID3D12RootSignature */ - static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) -@@ -374,8 +375,8 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig - - if (unbounded && range->OffsetInDescriptorsFromTableStart == D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) - { -- WARN("An unbounded range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " -- "another unbounded range.\n"); -+ WARN("A range with offset D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND occurs after " -+ "an unbounded range.\n"); - return E_INVALIDARG; - } - -@@ -1958,7 +1959,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, - - const struct vkd3d_shader_compile_option options[] = - { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_9}, - {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, - {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, - }; -@@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, - compile_info.next = shader_interface; - compile_info.source.code = code->pShaderBytecode; - compile_info.source.size = code->BytecodeLength; -- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; - compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; - compile_info.options = options; - compile_info.option_count = ARRAY_SIZE(options); - compile_info.log_level = VKD3D_SHADER_LOG_NONE; - compile_info.source_name = NULL; - -- if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) -+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 -+ || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) - { - WARN("Failed to compile shader, vkd3d result %d.\n", ret); - return hresult_from_vkd3d_result(ret); -@@ -2008,10 +2009,11 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - struct vkd3d_shader_scan_descriptor_info *descriptor_info) - { - struct vkd3d_shader_compile_info compile_info; -+ enum vkd3d_result ret; - - const struct vkd3d_shader_compile_option options[] = - { -- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_9}, - {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, - }; - -@@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER - compile_info.next = descriptor_info; - compile_info.source.code = code->pShaderBytecode; - compile_info.source.size = code->BytecodeLength; -- compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; - compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; - compile_info.options = options; - compile_info.option_count = ARRAY_SIZE(options); - compile_info.log_level = VKD3D_SHADER_LOG_NONE; - compile_info.source_name = NULL; - -+ if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) -+ return ret; -+ - return vkd3d_shader_scan(&compile_info, NULL); - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index 88301fbb313..159560afd8e 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - - if (!device) - { -- ID3D12Device_Release(&object->ID3D12Device_iface); -+ ID3D12Device_Release(&object->ID3D12Device1_iface); - return S_FALSE; - } - -- return return_interface(&object->ID3D12Device_iface, &IID_ID3D12Device, iid, device); -+ return return_interface(&object->ID3D12Device1_iface, &IID_ID3D12Device, iid, device); - } - - /* ID3D12RootSignatureDeserializer */ -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 77b795d6278..a18287b4cd4 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -31,8 +31,8 @@ - #include "vkd3d_blob.h" - #include "vkd3d_memory.h" - #include "vkd3d_utf8.h" --#include "wine/list.h" --#include "wine/rbtree.h" -+#include "list.h" -+#include "rbtree.h" - - #include "vkd3d.h" - #include "vkd3d_shader.h" -@@ -44,13 +44,11 @@ - - #define VK_CALL(f) (vk_procs->f) - --#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u -- - #define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u - #define VKD3D_DESCRIPTOR_MAGIC_CBV VKD3D_MAKE_TAG('C', 'B', 'V', 0) --#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 1) --#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 1) --#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 1) -+#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 0) -+#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 0) -+#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 0) - #define VKD3D_DESCRIPTOR_MAGIC_DSV VKD3D_MAKE_TAG('D', 'S', 'V', 0) - #define VKD3D_DESCRIPTOR_MAGIC_RTV VKD3D_MAKE_TAG('R', 'T', 'V', 0) - -@@ -149,9 +147,12 @@ struct vkd3d_vulkan_info - unsigned int max_vertex_attrib_divisor; - - VkPhysicalDeviceLimits device_limits; -- VkPhysicalDeviceSparseProperties sparse_properties; - struct vkd3d_device_descriptor_limits descriptor_limits; - -+ VkPhysicalDeviceSparseProperties sparse_properties; -+ bool sparse_binding; -+ bool sparse_residency_3d; -+ - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; - - unsigned int shader_extension_count; -@@ -252,6 +253,36 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) - { - } - -+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) -+{ -+ return InterlockedIncrement((LONG volatile *)x); -+} -+ -+static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) -+{ -+ return InterlockedDecrement((LONG volatile *)x); -+} -+ -+static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) -+{ -+ return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; -+} -+ -+static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) -+{ -+ return InterlockedExchange((LONG volatile *)x, val); -+} -+ -+static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) -+{ -+ return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; -+} -+ -+static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) -+{ -+ return InterlockedExchangePointer(x, val); -+} -+ - #else /* _WIN32 */ - - #include -@@ -354,6 +385,72 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) - ERR("Could not destroy the condition variable, error %d.\n", ret); - } - -+# if HAVE_SYNC_SUB_AND_FETCH -+static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) -+{ -+ return __sync_sub_and_fetch(x, 1); -+} -+# else -+# error "vkd3d_atomic_decrement() not implemented for this platform" -+# endif /* HAVE_SYNC_SUB_AND_FETCH */ -+ -+# if HAVE_SYNC_ADD_AND_FETCH -+static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) -+{ -+ return __sync_add_and_fetch(x, 1); -+} -+# else -+# error "vkd3d_atomic_increment() not implemented for this platform" -+# endif /* HAVE_SYNC_ADD_AND_FETCH */ -+ -+# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP -+static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) -+{ -+ return __sync_bool_compare_and_swap(x, cmp, xchg); -+} -+ -+static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) -+{ -+ return __sync_bool_compare_and_swap(x, cmp, xchg); -+} -+# else -+# error "vkd3d_atomic_compare_exchange() not implemented for this platform" -+# endif -+ -+# if HAVE_ATOMIC_EXCHANGE_N -+static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) -+{ -+ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -+} -+ -+static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) -+{ -+ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); -+} -+# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP -+static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) -+{ -+ unsigned int i; -+ do -+ { -+ i = *x; -+ } while (!__sync_bool_compare_and_swap(x, i, val)); -+ return i; -+} -+ -+static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) -+{ -+ void *p; -+ do -+ { -+ p = *x; -+ } while (!__sync_bool_compare_and_swap(x, p, val)); -+ return p; -+} -+# else -+# error "vkd3d_atomic_exchange() not implemented for this platform" -+# endif -+ - #endif /* _WIN32 */ - - HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, -@@ -519,10 +616,12 @@ struct vkd3d_signaled_semaphore - /* ID3D12Fence */ - struct d3d12_fence - { -- ID3D12Fence ID3D12Fence_iface; -+ ID3D12Fence1 ID3D12Fence1_iface; - LONG internal_refcount; - LONG refcount; - -+ D3D12_FENCE_FLAGS flags; -+ - uint64_t value; - uint64_t max_pending_value; - struct vkd3d_mutex mutex; -@@ -563,6 +662,7 @@ struct d3d12_heap - { - ID3D12Heap ID3D12Heap_iface; - LONG refcount; -+ LONG resource_count; - - bool is_private; - D3D12_HEAP_DESC desc; -@@ -589,6 +689,30 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); - #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 - #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 - -+struct vkd3d_tiled_region_extent -+{ -+ unsigned int width; -+ unsigned int height; -+ unsigned int depth; -+}; -+ -+struct vkd3d_subresource_tile_info -+{ -+ unsigned int offset; -+ unsigned int count; -+ struct vkd3d_tiled_region_extent extent; -+}; -+ -+struct d3d12_resource_tile_info -+{ -+ VkExtent3D tile_extent; -+ unsigned int total_count; -+ unsigned int standard_mip_count; -+ unsigned int packed_mip_tile_count; -+ unsigned int subresource_count; -+ struct vkd3d_subresource_tile_info *subresources; -+}; -+ - /* ID3D12Resource */ - struct d3d12_resource - { -@@ -617,9 +741,16 @@ struct d3d12_resource - - struct d3d12_device *device; - -+ struct d3d12_resource_tile_info tiles; -+ - struct vkd3d_private_store private_store; - }; - -+static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) -+{ -+ return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); -+} -+ - static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) - { - return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; -@@ -632,6 +763,10 @@ static inline bool d3d12_resource_is_texture(const struct d3d12_resource *resour - - bool d3d12_resource_is_cpu_accessible(const struct d3d12_resource *resource); - HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d12_device *device); -+void d3d12_resource_get_tiling(struct d3d12_device *device, const struct d3d12_resource *resource, -+ UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, -+ UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, -+ D3D12_SUBRESOURCE_TILING *sub_resource_tilings); - - HRESULT d3d12_committed_resource_create(struct d3d12_device *device, - const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, -@@ -661,11 +796,9 @@ enum vkd3d_view_type - VKD3D_VIEW_TYPE_SAMPLER, - }; - --struct vkd3d_view -+struct vkd3d_resource_view - { -- LONG refcount; - enum vkd3d_view_type type; -- uint64_t serial_id; - union - { - VkBufferView vk_buffer_view; -@@ -691,9 +824,6 @@ struct vkd3d_view - } info; - }; - --void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device); --void vkd3d_view_incref(struct vkd3d_view *view); -- - struct vkd3d_texture_view_desc - { - VkImageViewType view_type; -@@ -707,32 +837,89 @@ struct vkd3d_texture_view_desc - bool allowed_swizzle; - }; - --bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, -- VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); --bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, -+struct vkd3d_desc_header -+{ -+ uint32_t magic; -+ unsigned int volatile refcount; -+ void *next; -+ VkDescriptorType vk_descriptor_type; -+}; -+ -+struct vkd3d_view -+{ -+ struct vkd3d_desc_header h; -+ struct vkd3d_resource_view v; -+}; -+ -+bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, -+ const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); -+bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, - const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view); - --struct vkd3d_view_info -+struct vkd3d_cbuffer_desc - { -- uint64_t written_serial_id; -- struct vkd3d_view *view; -+ struct vkd3d_desc_header h; -+ VkDescriptorBufferInfo vk_cbv_info; - }; - - struct d3d12_desc - { - struct - { -- uint32_t magic; -- VkDescriptorType vk_descriptor_type; -- union -+ union d3d12_desc_object - { -- VkDescriptorBufferInfo vk_cbv_info; -- struct vkd3d_view_info view_info; -+ struct vkd3d_desc_header *header; -+ struct vkd3d_view *view; -+ struct vkd3d_cbuffer_desc *cb_desc; -+ void *object; - } u; - } s; - unsigned int index; -+ unsigned int next; - }; - -+void vkd3d_view_decref(void *view, struct d3d12_device *device); -+ -+static inline bool vkd3d_view_incref(void *desc) -+{ -+ struct vkd3d_desc_header *h = desc; -+ unsigned int refcount; -+ -+ do -+ { -+ refcount = h->refcount; -+ /* Avoid incrementing a freed object. Reading the value is safe because objects are recycled. */ -+ if (refcount <= 0) -+ return false; -+ } -+ while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); -+ -+ return true; -+} -+ -+static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc *src, struct d3d12_device *device) -+{ -+ void *view; -+ -+ /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors -+ * from multiple threads without syncronisation. This is apparently valid in Windows. */ -+ for (;;) -+ { -+ do -+ { -+ if (!(view = src->s.u.object)) -+ return NULL; -+ } while (!vkd3d_view_incref(view)); -+ -+ /* Check if the object is still in src to handle the case where it was -+ * already freed and reused elsewhere when the refcount was incremented. */ -+ if (view == src->s.u.object) -+ return view; -+ -+ vkd3d_view_decref(view, device); -+ } -+} -+ - static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) - { - return (struct d3d12_desc *)cpu_handle.ptr; -@@ -748,7 +935,10 @@ static inline void d3d12_desc_copy_raw(struct d3d12_desc *dst, const struct d3d1 - dst->s = src->s; - } - --void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); -+struct d3d12_descriptor_heap; -+ -+void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_descriptor_heap *dst_heap, -+ struct d3d12_device *device); - void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, - struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); - void d3d12_desc_create_srv(struct d3d12_desc *descriptor, -@@ -761,13 +951,12 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device * - void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); - - bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, -- D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); -+ D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view); - HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, - const D3D12_STATIC_SAMPLER_DESC *desc, VkSampler *vk_sampler); - - struct d3d12_rtv_desc - { -- uint32_t magic; - VkSampleCountFlagBits sample_count; - const struct vkd3d_format *format; - uint64_t width; -@@ -787,7 +976,6 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev - - struct d3d12_dsv_desc - { -- uint32_t magic; - VkSampleCountFlagBits sample_count; - const struct vkd3d_format *format; - uint64_t width; -@@ -837,15 +1025,10 @@ struct vkd3d_vk_descriptor_heap_layout - VkDescriptorSetLayout vk_set_layout; - }; - --#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 64 -- - struct d3d12_descriptor_heap_vk_set - { - VkDescriptorSet vk_set; -- VkDescriptorBufferInfo vk_buffer_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -- VkBufferView vk_buffer_views[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -- VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -- VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; -+ VkDescriptorType vk_type; - }; - - /* ID3D12DescriptorHeap */ -@@ -858,6 +1041,7 @@ struct d3d12_descriptor_heap - D3D12_DESCRIPTOR_HEAP_DESC desc; - - struct d3d12_device *device; -+ bool use_vk_heaps; - - struct vkd3d_private_store private_store; - -@@ -865,9 +1049,13 @@ struct d3d12_descriptor_heap - struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; - struct vkd3d_mutex vk_sets_mutex; - -- BYTE descriptors[]; -+ unsigned int volatile dirty_list_head; -+ -+ uint8_t DECLSPEC_ALIGN(sizeof(void *)) descriptors[]; - }; - -+void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device); -+ - static inline struct d3d12_descriptor_heap *d3d12_desc_get_descriptor_heap(const struct d3d12_desc *descriptor) - { - return CONTAINING_RECORD(descriptor - descriptor->index, struct d3d12_descriptor_heap, descriptors); -@@ -882,22 +1070,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d - HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, - const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap); - --struct d3d12_desc_copy_location --{ -- struct d3d12_desc src; -- struct d3d12_desc *dst; --}; -- --struct d3d12_desc_copy_info --{ -- unsigned int count; -- bool uav_counter; --}; -- --void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, -- struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, -- struct d3d12_device *device); -- - /* ID3D12QueryHeap */ - struct d3d12_query_heap - { -@@ -1254,7 +1426,7 @@ enum vkd3d_pipeline_bind_point - /* ID3D12CommandList */ - struct d3d12_command_list - { -- ID3D12GraphicsCommandList2 ID3D12GraphicsCommandList2_iface; -+ ID3D12GraphicsCommandList3 ID3D12GraphicsCommandList3_iface; - LONG refcount; - - D3D12_COMMAND_LIST_TYPE type; -@@ -1295,6 +1467,8 @@ struct d3d12_command_list - VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; - - void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); -+ struct d3d12_descriptor_heap *descriptor_heaps[64]; -+ unsigned int descriptor_heap_count; - - struct vkd3d_private_store private_store; - }; -@@ -1339,6 +1513,8 @@ enum vkd3d_cs_op - VKD3D_CS_OP_WAIT, - VKD3D_CS_OP_SIGNAL, - VKD3D_CS_OP_EXECUTE, -+ VKD3D_CS_OP_UPDATE_MAPPINGS, -+ VKD3D_CS_OP_COPY_MAPPINGS, - }; - - struct vkd3d_cs_wait -@@ -1359,6 +1535,30 @@ struct vkd3d_cs_execute - unsigned int buffer_count; - }; - -+struct vkd3d_cs_update_mappings -+{ -+ struct d3d12_resource *resource; -+ struct d3d12_heap *heap; -+ D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; -+ D3D12_TILE_REGION_SIZE *region_sizes; -+ D3D12_TILE_RANGE_FLAGS *range_flags; -+ UINT *heap_range_offsets; -+ UINT *range_tile_counts; -+ UINT region_count; -+ UINT range_count; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ -+struct vkd3d_cs_copy_mappings -+{ -+ struct d3d12_resource *dst_resource; -+ struct d3d12_resource *src_resource; -+ D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; -+ D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; -+ D3D12_TILE_REGION_SIZE region_size; -+ D3D12_TILE_MAPPING_FLAGS flags; -+}; -+ - struct vkd3d_cs_op_data - { - enum vkd3d_cs_op opcode; -@@ -1367,6 +1567,8 @@ struct vkd3d_cs_op_data - struct vkd3d_cs_wait wait; - struct vkd3d_cs_signal signal; - struct vkd3d_cs_execute execute; -+ struct vkd3d_cs_update_mappings update_mappings; -+ struct vkd3d_cs_copy_mappings copy_mappings; - } u; - }; - -@@ -1404,6 +1606,8 @@ struct d3d12_command_queue - * set, aux_op_queue.count must be zero. */ - struct d3d12_command_queue_op_array aux_op_queue; - -+ bool supports_sparse_binding; -+ - struct vkd3d_private_store private_store; - }; - -@@ -1415,6 +1619,7 @@ struct d3d12_command_signature - { - ID3D12CommandSignature ID3D12CommandSignature_iface; - LONG refcount; -+ unsigned int internal_refcount; - - D3D12_COMMAND_SIGNATURE_DESC desc; - -@@ -1485,12 +1690,26 @@ struct vkd3d_uav_clear_state - HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); - void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); - -+struct desc_object_cache_head -+{ -+ void *head; -+ unsigned int spinlock; -+}; -+ -+struct vkd3d_desc_object_cache -+{ -+ struct desc_object_cache_head heads[16]; -+ unsigned int next_index; -+ unsigned int free_count; -+ size_t size; -+}; -+ - #define VKD3D_DESCRIPTOR_POOL_COUNT 6 - - /* ID3D12Device */ - struct d3d12_device - { -- ID3D12Device ID3D12Device_iface; -+ ID3D12Device1 ID3D12Device1_iface; - LONG refcount; - - VkDevice vk_device; -@@ -1502,7 +1721,8 @@ struct d3d12_device - struct vkd3d_gpu_va_allocator gpu_va_allocator; - - struct vkd3d_mutex mutex; -- struct vkd3d_mutex desc_mutex[8]; -+ struct vkd3d_desc_object_cache view_desc_cache; -+ struct vkd3d_desc_object_cache cbuffer_desc_cache; - struct vkd3d_render_pass_cache render_pass_cache; - VkPipelineCache vk_pipeline_cache; - -@@ -1544,6 +1764,7 @@ struct d3d12_device - struct vkd3d_uav_clear_state uav_clear_state; - - VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; -+ unsigned int vk_pool_count; - struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; - bool use_vk_heaps; - }; -@@ -1554,40 +1775,27 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 - bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); - void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, - const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); --struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface); -+struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface); - - static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) - { -- return ID3D12Device_QueryInterface(&device->ID3D12Device_iface, iid, object); -+ return ID3D12Device1_QueryInterface(&device->ID3D12Device1_iface, iid, object); - } - - static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) - { -- return ID3D12Device_AddRef(&device->ID3D12Device_iface); -+ return ID3D12Device1_AddRef(&device->ID3D12Device1_iface); - } - - static inline ULONG d3d12_device_release(struct d3d12_device *device) - { -- return ID3D12Device_Release(&device->ID3D12Device_iface); -+ return ID3D12Device1_Release(&device->ID3D12Device1_iface); - } - - static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, - D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) - { -- return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); --} -- --static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, -- const struct d3d12_desc *descriptor) --{ -- STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); -- uintptr_t idx = (uintptr_t)descriptor; -- -- idx ^= idx >> 12; -- idx ^= idx >> 6; -- idx ^= idx >> 3; -- -- return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; -+ return ID3D12Device1_GetDescriptorHandleIncrementSize(&device->ID3D12Device1_iface, descriptor_type); - } - - /* utils */ --- -2.40.1 - diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-ca05e57e67306e9b97eb22a35cd77728e3e.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-ca05e57e67306e9b97eb22a35cd77728e3e.patch new file mode 100644 index 00000000..e7af90d0 --- /dev/null +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-ca05e57e67306e9b97eb22a35cd77728e3e.patch @@ -0,0 +1,3862 @@ +From 693aa55e7a12e42906f9e20049f9856915013450 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 17 May 2023 08:35:40 +1000 +Subject: [PATCH] Updated vkd3d to ca05e57e67306e9b97eb22a35cd77728e3e91db9 + +--- + libs/vkd3d/include/list.h | 270 +++++++++++ + libs/vkd3d/include/private/list.h | 270 +++++++++++ + libs/vkd3d/include/private/rbtree.h | 378 +++++++++++++++ + libs/vkd3d/include/private/vkd3d_common.h | 2 +- + libs/vkd3d/include/private/vkd3d_test.h | 432 ++++++++++++++++++ + libs/vkd3d/include/vkd3d_d3d9types.h | 237 ++++++++++ + libs/vkd3d/include/vkd3d_d3dcompiler.h | 74 +++ + libs/vkd3d/include/vkd3d_utils.h | 108 +++++ + libs/vkd3d/include/vkd3d_windows.h | 289 ++++++++++++ + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 30 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 10 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 2 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 35 +- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 187 +++++++- + libs/vkd3d/libs/vkd3d-shader/ir.c | 150 +++++- + libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 14 +- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 22 + + .../libs/vkd3d-shader/vkd3d_shader_private.h | 35 +- + libs/vkd3d/libs/vkd3d/command.c | 1 + + libs/vkd3d/libs/vkd3d/device.c | 199 ++++---- + libs/vkd3d/libs/vkd3d/resource.c | 2 +- + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 16 +- + 27 files changed, 2597 insertions(+), 177 deletions(-) + create mode 100644 libs/vkd3d/include/list.h + create mode 100644 libs/vkd3d/include/private/list.h + create mode 100644 libs/vkd3d/include/private/rbtree.h + create mode 100644 libs/vkd3d/include/private/vkd3d_test.h + create mode 100644 libs/vkd3d/include/vkd3d_d3d9types.h + create mode 100644 libs/vkd3d/include/vkd3d_d3dcompiler.h + create mode 100644 libs/vkd3d/include/vkd3d_utils.h + create mode 100644 libs/vkd3d/include/vkd3d_windows.h + +diff --git a/libs/vkd3d/include/list.h b/libs/vkd3d/include/list.h +new file mode 100644 +index 00000000000..2e1d95f3fd4 +--- /dev/null ++++ b/libs/vkd3d/include/list.h +@@ -0,0 +1,270 @@ ++/* ++ * Linked lists support ++ * ++ * Copyright (C) 2002 Alexandre Julliard ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __WINE_SERVER_LIST_H ++#define __WINE_SERVER_LIST_H ++ ++#include ++ ++struct list ++{ ++ struct list *next; ++ struct list *prev; ++}; ++ ++/* Define a list like so: ++ * ++ * struct gadget ++ * { ++ * struct list entry; <-- doesn't have to be the first item in the struct ++ * int a, b; ++ * }; ++ * ++ * static struct list global_gadgets = LIST_INIT( global_gadgets ); ++ * ++ * or ++ * ++ * struct some_global_thing ++ * { ++ * struct list gadgets; ++ * }; ++ * ++ * list_init( &some_global_thing->gadgets ); ++ * ++ * Manipulate it like this: ++ * ++ * list_add_head( &global_gadgets, &new_gadget->entry ); ++ * list_remove( &new_gadget->entry ); ++ * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); ++ * ++ * And to iterate over it: ++ * ++ * struct gadget *gadget; ++ * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) ++ * { ++ * ... ++ * } ++ * ++ */ ++ ++/* add an element after the specified one */ ++static inline void list_add_after( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem->next; ++ to_add->prev = elem; ++ elem->next->prev = to_add; ++ elem->next = to_add; ++} ++ ++/* add an element before the specified one */ ++static inline void list_add_before( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem; ++ to_add->prev = elem->prev; ++ elem->prev->next = to_add; ++ elem->prev = to_add; ++} ++ ++/* add element at the head of the list */ ++static inline void list_add_head( struct list *list, struct list *elem ) ++{ ++ list_add_after( list, elem ); ++} ++ ++/* add element at the tail of the list */ ++static inline void list_add_tail( struct list *list, struct list *elem ) ++{ ++ list_add_before( list, elem ); ++} ++ ++/* remove an element from its list */ ++static inline void list_remove( struct list *elem ) ++{ ++ elem->next->prev = elem->prev; ++ elem->prev->next = elem->next; ++} ++ ++/* get the next element */ ++static inline struct list *list_next( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->next; ++ if (elem->next == list) ret = NULL; ++ return ret; ++} ++ ++/* get the previous element */ ++static inline struct list *list_prev( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->prev; ++ if (elem->prev == list) ret = NULL; ++ return ret; ++} ++ ++/* get the first element */ ++static inline struct list *list_head( const struct list *list ) ++{ ++ return list_next( list, list ); ++} ++ ++/* get the last element */ ++static inline struct list *list_tail( const struct list *list ) ++{ ++ return list_prev( list, list ); ++} ++ ++/* check if a list is empty */ ++static inline int list_empty( const struct list *list ) ++{ ++ return list->next == list; ++} ++ ++/* initialize a list */ ++static inline void list_init( struct list *list ) ++{ ++ list->next = list->prev = list; ++} ++ ++/* count the elements of a list */ ++static inline unsigned int list_count( const struct list *list ) ++{ ++ unsigned count = 0; ++ const struct list *ptr; ++ for (ptr = list->next; ptr != list; ptr = ptr->next) count++; ++ return count; ++} ++ ++/* move all elements from src to before the specified element */ ++static inline void list_move_before( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->prev->next = src->next; ++ src->next->prev = dst->prev; ++ dst->prev = src->prev; ++ src->prev->next = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to after the specified element */ ++static inline void list_move_after( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->next->prev = src->prev; ++ src->prev->next = dst->next; ++ dst->next = src->next; ++ src->next->prev = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to the head of dst */ ++static inline void list_move_head( struct list *dst, struct list *src ) ++{ ++ list_move_after( dst, src ); ++} ++ ++/* move all elements from src to the tail of dst */ ++static inline void list_move_tail( struct list *dst, struct list *src ) ++{ ++ list_move_before( dst, src ); ++} ++ ++/* move the slice of elements from begin to end inclusive to the head of dst */ ++static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_next = dst->next; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst->next = begin; ++ dst_next->prev = end; ++ begin->prev = dst; ++ end->next = dst_next; ++} ++ ++/* move the slice of elements from begin to end inclusive to the tail of dst */ ++static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_prev = dst->prev; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst_prev->next = begin; ++ dst->prev = end; ++ begin->prev = dst_prev; ++ end->next = dst; ++} ++ ++/* iterate through the list */ ++#define LIST_FOR_EACH(cursor,list) \ ++ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) ++ ++/* iterate through the list, with safety against removal */ ++#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ ++ for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->next) ++ ++/* iterate through the list using a list entry */ ++#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->next, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.next, type, field)) ++ ++/* iterate through the list using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->next, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) ++ ++/* iterate through the list in reverse order */ ++#define LIST_FOR_EACH_REV(cursor,list) \ ++ for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) ++ ++/* iterate through the list in reverse order, with safety against removal */ ++#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ ++ for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->prev) ++ ++/* iterate through the list in reverse order using a list entry */ ++#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->prev, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.prev, type, field)) ++ ++/* iterate through the list in reverse order using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) ++ ++/* macros for statically initialized lists */ ++#undef LIST_INIT ++#define LIST_INIT(list) { &(list), &(list) } ++ ++/* get pointer to object containing list element */ ++#undef LIST_ENTRY ++#define LIST_ENTRY(elem, type, field) \ ++ ((type *)((char *)(elem) - offsetof(type, field))) ++ ++#endif /* __WINE_SERVER_LIST_H */ +diff --git a/libs/vkd3d/include/private/list.h b/libs/vkd3d/include/private/list.h +new file mode 100644 +index 00000000000..2e1d95f3fd4 +--- /dev/null ++++ b/libs/vkd3d/include/private/list.h +@@ -0,0 +1,270 @@ ++/* ++ * Linked lists support ++ * ++ * Copyright (C) 2002 Alexandre Julliard ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __WINE_SERVER_LIST_H ++#define __WINE_SERVER_LIST_H ++ ++#include ++ ++struct list ++{ ++ struct list *next; ++ struct list *prev; ++}; ++ ++/* Define a list like so: ++ * ++ * struct gadget ++ * { ++ * struct list entry; <-- doesn't have to be the first item in the struct ++ * int a, b; ++ * }; ++ * ++ * static struct list global_gadgets = LIST_INIT( global_gadgets ); ++ * ++ * or ++ * ++ * struct some_global_thing ++ * { ++ * struct list gadgets; ++ * }; ++ * ++ * list_init( &some_global_thing->gadgets ); ++ * ++ * Manipulate it like this: ++ * ++ * list_add_head( &global_gadgets, &new_gadget->entry ); ++ * list_remove( &new_gadget->entry ); ++ * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); ++ * ++ * And to iterate over it: ++ * ++ * struct gadget *gadget; ++ * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) ++ * { ++ * ... ++ * } ++ * ++ */ ++ ++/* add an element after the specified one */ ++static inline void list_add_after( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem->next; ++ to_add->prev = elem; ++ elem->next->prev = to_add; ++ elem->next = to_add; ++} ++ ++/* add an element before the specified one */ ++static inline void list_add_before( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem; ++ to_add->prev = elem->prev; ++ elem->prev->next = to_add; ++ elem->prev = to_add; ++} ++ ++/* add element at the head of the list */ ++static inline void list_add_head( struct list *list, struct list *elem ) ++{ ++ list_add_after( list, elem ); ++} ++ ++/* add element at the tail of the list */ ++static inline void list_add_tail( struct list *list, struct list *elem ) ++{ ++ list_add_before( list, elem ); ++} ++ ++/* remove an element from its list */ ++static inline void list_remove( struct list *elem ) ++{ ++ elem->next->prev = elem->prev; ++ elem->prev->next = elem->next; ++} ++ ++/* get the next element */ ++static inline struct list *list_next( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->next; ++ if (elem->next == list) ret = NULL; ++ return ret; ++} ++ ++/* get the previous element */ ++static inline struct list *list_prev( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->prev; ++ if (elem->prev == list) ret = NULL; ++ return ret; ++} ++ ++/* get the first element */ ++static inline struct list *list_head( const struct list *list ) ++{ ++ return list_next( list, list ); ++} ++ ++/* get the last element */ ++static inline struct list *list_tail( const struct list *list ) ++{ ++ return list_prev( list, list ); ++} ++ ++/* check if a list is empty */ ++static inline int list_empty( const struct list *list ) ++{ ++ return list->next == list; ++} ++ ++/* initialize a list */ ++static inline void list_init( struct list *list ) ++{ ++ list->next = list->prev = list; ++} ++ ++/* count the elements of a list */ ++static inline unsigned int list_count( const struct list *list ) ++{ ++ unsigned count = 0; ++ const struct list *ptr; ++ for (ptr = list->next; ptr != list; ptr = ptr->next) count++; ++ return count; ++} ++ ++/* move all elements from src to before the specified element */ ++static inline void list_move_before( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->prev->next = src->next; ++ src->next->prev = dst->prev; ++ dst->prev = src->prev; ++ src->prev->next = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to after the specified element */ ++static inline void list_move_after( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->next->prev = src->prev; ++ src->prev->next = dst->next; ++ dst->next = src->next; ++ src->next->prev = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to the head of dst */ ++static inline void list_move_head( struct list *dst, struct list *src ) ++{ ++ list_move_after( dst, src ); ++} ++ ++/* move all elements from src to the tail of dst */ ++static inline void list_move_tail( struct list *dst, struct list *src ) ++{ ++ list_move_before( dst, src ); ++} ++ ++/* move the slice of elements from begin to end inclusive to the head of dst */ ++static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_next = dst->next; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst->next = begin; ++ dst_next->prev = end; ++ begin->prev = dst; ++ end->next = dst_next; ++} ++ ++/* move the slice of elements from begin to end inclusive to the tail of dst */ ++static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_prev = dst->prev; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst_prev->next = begin; ++ dst->prev = end; ++ begin->prev = dst_prev; ++ end->next = dst; ++} ++ ++/* iterate through the list */ ++#define LIST_FOR_EACH(cursor,list) \ ++ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) ++ ++/* iterate through the list, with safety against removal */ ++#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ ++ for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->next) ++ ++/* iterate through the list using a list entry */ ++#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->next, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.next, type, field)) ++ ++/* iterate through the list using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->next, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) ++ ++/* iterate through the list in reverse order */ ++#define LIST_FOR_EACH_REV(cursor,list) \ ++ for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) ++ ++/* iterate through the list in reverse order, with safety against removal */ ++#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ ++ for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->prev) ++ ++/* iterate through the list in reverse order using a list entry */ ++#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->prev, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.prev, type, field)) ++ ++/* iterate through the list in reverse order using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) ++ ++/* macros for statically initialized lists */ ++#undef LIST_INIT ++#define LIST_INIT(list) { &(list), &(list) } ++ ++/* get pointer to object containing list element */ ++#undef LIST_ENTRY ++#define LIST_ENTRY(elem, type, field) \ ++ ((type *)((char *)(elem) - offsetof(type, field))) ++ ++#endif /* __WINE_SERVER_LIST_H */ +diff --git a/libs/vkd3d/include/private/rbtree.h b/libs/vkd3d/include/private/rbtree.h +new file mode 100644 +index 00000000000..b5d38bca54c +--- /dev/null ++++ b/libs/vkd3d/include/private/rbtree.h +@@ -0,0 +1,378 @@ ++/* ++ * Red-black search tree support ++ * ++ * Copyright 2009 Henri Verbeet ++ * Copyright 2009 Andrew Riedi ++ * Copyright 2016 Jacek Caban for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __WINE_WINE_RBTREE_H ++#define __WINE_WINE_RBTREE_H ++ ++#define RB_ENTRY_VALUE(element, type, field) \ ++ ((type *)((char *)(element) - offsetof(type, field))) ++ ++struct rb_entry ++{ ++ struct rb_entry *parent; ++ struct rb_entry *left; ++ struct rb_entry *right; ++ unsigned int flags; ++}; ++ ++typedef int (*rb_compare_func)(const void *key, const struct rb_entry *entry); ++ ++struct rb_tree ++{ ++ rb_compare_func compare; ++ struct rb_entry *root; ++}; ++ ++typedef void (rb_traverse_func)(struct rb_entry *entry, void *context); ++ ++#define RB_FLAG_RED 0x1 ++ ++static inline int rb_is_red(struct rb_entry *entry) ++{ ++ return entry && (entry->flags & RB_FLAG_RED); ++} ++ ++static inline void rb_rotate_left(struct rb_tree *tree, struct rb_entry *e) ++{ ++ struct rb_entry *right = e->right; ++ ++ if (!e->parent) ++ tree->root = right; ++ else if (e->parent->left == e) ++ e->parent->left = right; ++ else ++ e->parent->right = right; ++ ++ e->right = right->left; ++ if (e->right) e->right->parent = e; ++ right->left = e; ++ right->parent = e->parent; ++ e->parent = right; ++} ++ ++static inline void rb_rotate_right(struct rb_tree *tree, struct rb_entry *e) ++{ ++ struct rb_entry *left = e->left; ++ ++ if (!e->parent) ++ tree->root = left; ++ else if (e->parent->left == e) ++ e->parent->left = left; ++ else ++ e->parent->right = left; ++ ++ e->left = left->right; ++ if (e->left) e->left->parent = e; ++ left->right = e; ++ left->parent = e->parent; ++ e->parent = left; ++} ++ ++static inline void rb_flip_color(struct rb_entry *entry) ++{ ++ entry->flags ^= RB_FLAG_RED; ++ entry->left->flags ^= RB_FLAG_RED; ++ entry->right->flags ^= RB_FLAG_RED; ++} ++ ++static inline struct rb_entry *rb_head(struct rb_entry *iter) ++{ ++ if (!iter) return NULL; ++ while (iter->left) iter = iter->left; ++ return iter; ++} ++ ++static inline struct rb_entry *rb_next(struct rb_entry *iter) ++{ ++ if (iter->right) return rb_head(iter->right); ++ while (iter->parent && iter->parent->right == iter) iter = iter->parent; ++ return iter->parent; ++} ++ ++static inline struct rb_entry *rb_postorder_head(struct rb_entry *iter) ++{ ++ if (!iter) return NULL; ++ ++ for (;;) { ++ while (iter->left) iter = iter->left; ++ if (!iter->right) return iter; ++ iter = iter->right; ++ } ++} ++ ++static inline struct rb_entry *rb_postorder_next(struct rb_entry *iter) ++{ ++ if (!iter->parent) return NULL; ++ if (iter == iter->parent->right || !iter->parent->right) return iter->parent; ++ return rb_postorder_head(iter->parent->right); ++} ++ ++/* iterate through the tree */ ++#define RB_FOR_EACH(cursor, tree) \ ++ for ((cursor) = rb_head((tree)->root); (cursor); (cursor) = rb_next(cursor)) ++ ++/* iterate through the tree using a tree entry */ ++#define RB_FOR_EACH_ENTRY(elem, tree, type, field) \ ++ for ((elem) = RB_ENTRY_VALUE(rb_head((tree)->root), type, field); \ ++ (elem) != RB_ENTRY_VALUE(0, type, field); \ ++ (elem) = RB_ENTRY_VALUE(rb_next(&elem->field), type, field)) ++ ++/* iterate through the tree using using postorder, making it safe to free the entry */ ++#define RB_FOR_EACH_DESTRUCTOR(cursor, cursor2, tree) \ ++ for ((cursor) = rb_postorder_head((tree)->root); \ ++ (cursor) && (((cursor2) = rb_postorder_next(cursor)) || 1); \ ++ (cursor) = (cursor2)) ++ ++/* iterate through the tree using a tree entry and postorder, making it safe to free the entry */ ++#define RB_FOR_EACH_ENTRY_DESTRUCTOR(elem, elem2, tree, type, field) \ ++ for ((elem) = RB_ENTRY_VALUE(rb_postorder_head((tree)->root), type, field); \ ++ (elem) != WINE_RB_ENTRY_VALUE(0, type, field) \ ++ && (((elem2) = RB_ENTRY_VALUE(rb_postorder_next(&(elem)->field), type, field)) || 1); \ ++ (elem) = (elem2)) ++ ++ ++static inline void rb_postorder(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ struct rb_entry *iter, *next; ++ RB_FOR_EACH_DESTRUCTOR(iter, next, tree) callback(iter, context); ++} ++ ++static inline void rb_init(struct rb_tree *tree, rb_compare_func compare) ++{ ++ tree->compare = compare; ++ tree->root = NULL; ++} ++ ++static inline void rb_for_each_entry(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ struct rb_entry *iter; ++ RB_FOR_EACH(iter, tree) callback(iter, context); ++} ++ ++static inline void rb_clear(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ /* Note that we use postorder here because the callback will likely free the entry. */ ++ if (callback) rb_postorder(tree, callback, context); ++ tree->root = NULL; ++} ++ ++static inline void rb_destroy(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ rb_clear(tree, callback, context); ++} ++ ++static inline struct rb_entry *rb_get(const struct rb_tree *tree, const void *key) ++{ ++ struct rb_entry *entry = tree->root; ++ while (entry) ++ { ++ int c = tree->compare(key, entry); ++ if (!c) return entry; ++ entry = c < 0 ? entry->left : entry->right; ++ } ++ return NULL; ++} ++ ++static inline int rb_put(struct rb_tree *tree, const void *key, struct rb_entry *entry) ++{ ++ struct rb_entry **iter = &tree->root, *parent = tree->root; ++ ++ while (*iter) ++ { ++ int c; ++ ++ parent = *iter; ++ c = tree->compare(key, parent); ++ if (!c) return -1; ++ else if (c < 0) iter = &parent->left; ++ else iter = &parent->right; ++ } ++ ++ entry->flags = RB_FLAG_RED; ++ entry->parent = parent; ++ entry->left = NULL; ++ entry->right = NULL; ++ *iter = entry; ++ ++ while (rb_is_red(entry->parent)) ++ { ++ if (entry->parent == entry->parent->parent->left) ++ { ++ if (rb_is_red(entry->parent->parent->right)) ++ { ++ rb_flip_color(entry->parent->parent); ++ entry = entry->parent->parent; ++ } ++ else ++ { ++ if (entry == entry->parent->right) ++ { ++ entry = entry->parent; ++ rb_rotate_left(tree, entry); ++ } ++ entry->parent->flags &= ~RB_FLAG_RED; ++ entry->parent->parent->flags |= RB_FLAG_RED; ++ rb_rotate_right(tree, entry->parent->parent); ++ } ++ } ++ else ++ { ++ if (rb_is_red(entry->parent->parent->left)) ++ { ++ rb_flip_color(entry->parent->parent); ++ entry = entry->parent->parent; ++ } ++ else ++ { ++ if (entry == entry->parent->left) ++ { ++ entry = entry->parent; ++ rb_rotate_right(tree, entry); ++ } ++ entry->parent->flags &= ~RB_FLAG_RED; ++ entry->parent->parent->flags |= RB_FLAG_RED; ++ rb_rotate_left(tree, entry->parent->parent); ++ } ++ } ++ } ++ ++ tree->root->flags &= ~RB_FLAG_RED; ++ ++ return 0; ++} ++ ++static inline void rb_remove(struct rb_tree *tree, struct rb_entry *entry) ++{ ++ struct rb_entry *iter, *child, *parent, *w; ++ int need_fixup; ++ ++ if (entry->right && entry->left) ++ for(iter = entry->right; iter->left; iter = iter->left); ++ else ++ iter = entry; ++ ++ child = iter->left ? iter->left : iter->right; ++ ++ if (!iter->parent) ++ tree->root = child; ++ else if (iter == iter->parent->left) ++ iter->parent->left = child; ++ else ++ iter->parent->right = child; ++ ++ if (child) child->parent = iter->parent; ++ parent = iter->parent; ++ ++ need_fixup = !rb_is_red(iter); ++ ++ if (entry != iter) ++ { ++ *iter = *entry; ++ if (!iter->parent) ++ tree->root = iter; ++ else if (entry == iter->parent->left) ++ iter->parent->left = iter; ++ else ++ iter->parent->right = iter; ++ ++ if (iter->right) iter->right->parent = iter; ++ if (iter->left) iter->left->parent = iter; ++ if (parent == entry) parent = iter; ++ } ++ ++ if (need_fixup) ++ { ++ while (parent && !rb_is_red(child)) ++ { ++ if (child == parent->left) ++ { ++ w = parent->right; ++ if (rb_is_red(w)) ++ { ++ w->flags &= ~RB_FLAG_RED; ++ parent->flags |= RB_FLAG_RED; ++ rb_rotate_left(tree, parent); ++ w = parent->right; ++ } ++ if (rb_is_red(w->left) || rb_is_red(w->right)) ++ { ++ if (!rb_is_red(w->right)) ++ { ++ w->left->flags &= ~RB_FLAG_RED; ++ w->flags |= RB_FLAG_RED; ++ rb_rotate_right(tree, w); ++ w = parent->right; ++ } ++ w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); ++ parent->flags &= ~RB_FLAG_RED; ++ if (w->right) ++ w->right->flags &= ~RB_FLAG_RED; ++ rb_rotate_left(tree, parent); ++ child = NULL; ++ break; ++ } ++ } ++ else ++ { ++ w = parent->left; ++ if (rb_is_red(w)) ++ { ++ w->flags &= ~RB_FLAG_RED; ++ parent->flags |= RB_FLAG_RED; ++ rb_rotate_right(tree, parent); ++ w = parent->left; ++ } ++ if (rb_is_red(w->left) || rb_is_red(w->right)) ++ { ++ if (!rb_is_red(w->left)) ++ { ++ w->right->flags &= ~RB_FLAG_RED; ++ w->flags |= RB_FLAG_RED; ++ rb_rotate_left(tree, w); ++ w = parent->left; ++ } ++ w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); ++ parent->flags &= ~RB_FLAG_RED; ++ if (w->left) ++ w->left->flags &= ~RB_FLAG_RED; ++ rb_rotate_right(tree, parent); ++ child = NULL; ++ break; ++ } ++ } ++ w->flags |= RB_FLAG_RED; ++ child = parent; ++ parent = child->parent; ++ } ++ if (child) child->flags &= ~RB_FLAG_RED; ++ } ++ ++ if (tree->root) tree->root->flags &= ~RB_FLAG_RED; ++} ++ ++static inline void rb_remove_key(struct rb_tree *tree, const void *key) ++{ ++ struct rb_entry *entry = rb_get(tree, key); ++ if (entry) rb_remove(tree, entry); ++} ++ ++#endif /* __WINE_WINE_RBTREE_H */ +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index ee733ee0d76..f7d98f327f1 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -86,7 +86,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/include/private/vkd3d_test.h b/libs/vkd3d/include/private/vkd3d_test.h +new file mode 100644 +index 00000000000..081443c4fa6 +--- /dev/null ++++ b/libs/vkd3d/include/private/vkd3d_test.h +@@ -0,0 +1,432 @@ ++/* ++ * Copyright 2016 Józef Kucia for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_TEST_H ++#define __VKD3D_TEST_H ++ ++#include "vkd3d_common.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern const char *vkd3d_test_name; ++extern const char *vkd3d_test_platform; ++ ++static void vkd3d_test_start_todo(bool is_todo); ++static int vkd3d_test_loop_todo(void); ++static void vkd3d_test_end_todo(void); ++ ++#define START_TEST(name) \ ++ const char *vkd3d_test_name = #name; \ ++ static void vkd3d_test_main(int argc, char **argv) ++ ++/* ++ * Use assert_that() for conditions that should always be true. ++ * todo_if() and bug_if() do not influence assert_that(). ++ */ ++#define assert_that assert_that_(__LINE__) ++ ++#define ok ok_(__LINE__) ++ ++#define skip skip_(__LINE__) ++ ++#define trace trace_(__LINE__) ++ ++#define assert_that_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_ASSERT_THAT ++ ++#define VKD3D_TEST_ASSERT_THAT(...) \ ++ vkd3d_test_assert_that(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define ok_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_OK ++ ++#define VKD3D_TEST_OK(...) \ ++ vkd3d_test_ok(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define todo_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_TODO ++ ++#define VKD3D_TEST_TODO(...) \ ++ vkd3d_test_todo(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define skip_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_SKIP ++ ++#define VKD3D_TEST_SKIP(...) \ ++ vkd3d_test_skip(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define trace_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_TRACE ++ ++#define VKD3D_TEST_TRACE(...) \ ++ vkd3d_test_trace(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define todo_if(is_todo) \ ++ for (vkd3d_test_start_todo(is_todo); vkd3d_test_loop_todo(); vkd3d_test_end_todo()) ++ ++#define bug_if(is_bug) \ ++ for (vkd3d_test_start_bug(is_bug); vkd3d_test_loop_bug(); vkd3d_test_end_bug()) ++ ++#define todo todo_if(true) ++ ++struct vkd3d_test_state ++{ ++ LONG success_count; ++ LONG failure_count; ++ LONG skip_count; ++ LONG todo_count; ++ LONG todo_success_count; ++ LONG bug_count; ++ ++ unsigned int debug_level; ++ ++ unsigned int todo_level; ++ bool todo_do_loop; ++ ++ unsigned int bug_level; ++ bool bug_do_loop; ++ bool bug_enabled; ++ ++ const char *test_name_filter; ++ char context[8][128]; ++ unsigned int context_count; ++}; ++extern struct vkd3d_test_state vkd3d_test_state; ++ ++static bool ++vkd3d_test_platform_is_windows(void) ++{ ++ return !strcmp(vkd3d_test_platform, "windows"); ++} ++ ++static inline bool ++broken(bool condition) ++{ ++ return condition && vkd3d_test_platform_is_windows(); ++} ++ ++static void vkd3d_test_printf(unsigned int line, const char *msg) ++{ ++ unsigned int i; ++ ++ printf("%s:%u: ", vkd3d_test_name, line); ++ for (i = 0; i < vkd3d_test_state.context_count; ++i) ++ printf("%s: ", vkd3d_test_state.context[i]); ++ printf("%s", msg); ++} ++ ++static void ++vkd3d_test_check_assert_that(unsigned int line, bool result, const char *fmt, va_list args) ++{ ++ if (result) ++ { ++ InterlockedIncrement(&vkd3d_test_state.success_count); ++ if (vkd3d_test_state.debug_level > 1) ++ vkd3d_test_printf(line, "Test succeeded.\n"); ++ } ++ else ++ { ++ InterlockedIncrement(&vkd3d_test_state.failure_count); ++ vkd3d_test_printf(line, "Test failed: "); ++ vprintf(fmt, args); ++ } ++} ++ ++static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED ++vkd3d_test_assert_that(unsigned int line, bool result, const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ vkd3d_test_check_assert_that(line, result, fmt, args); ++ va_end(args); ++} ++ ++static void ++vkd3d_test_check_ok(unsigned int line, bool result, const char *fmt, va_list args) ++{ ++ bool is_todo = vkd3d_test_state.todo_level && !vkd3d_test_platform_is_windows(); ++ bool is_bug = vkd3d_test_state.bug_level && !vkd3d_test_platform_is_windows(); ++ ++ if (is_bug && vkd3d_test_state.bug_enabled) ++ { ++ InterlockedIncrement(&vkd3d_test_state.bug_count); ++ if (is_todo) ++ result = !result; ++ if (result) ++ vkd3d_test_printf(line, "Fixed bug: "); ++ else ++ vkd3d_test_printf(line, "Bug: "); ++ vprintf(fmt, args); ++ } ++ else if (is_todo) ++ { ++ if (result) ++ { ++ InterlockedIncrement(&vkd3d_test_state.todo_success_count); ++ vkd3d_test_printf(line, "Todo succeeded: "); ++ } ++ else ++ { ++ InterlockedIncrement(&vkd3d_test_state.todo_count); ++ vkd3d_test_printf(line, "Todo: "); ++ } ++ vprintf(fmt, args); ++ } ++ else ++ { ++ vkd3d_test_check_assert_that(line, result, fmt, args); ++ } ++} ++ ++static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED ++vkd3d_test_ok(unsigned int line, bool result, const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ vkd3d_test_check_ok(line, result, fmt, args); ++ va_end(args); ++} ++ ++static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED ++vkd3d_test_skip(unsigned int line, const char *fmt, ...) ++{ ++ va_list args; ++ va_start(args, fmt); ++ vkd3d_test_printf(line, "Test skipped: "); ++ vprintf(fmt, args); ++ va_end(args); ++ InterlockedIncrement(&vkd3d_test_state.skip_count); ++} ++ ++static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED ++vkd3d_test_trace(unsigned int line, const char *fmt, ...) ++{ ++ va_list args; ++ va_start(args, fmt); ++ vkd3d_test_printf(line, ""); ++ vprintf(fmt, args); ++ va_end(args); ++} ++ ++static void VKD3D_PRINTF_FUNC(1, 2) VKD3D_UNUSED ++vkd3d_test_debug(const char *fmt, ...) ++{ ++ char buffer[512]; ++ va_list args; ++ int size; ++ ++ size = snprintf(buffer, sizeof(buffer), "%s: ", vkd3d_test_name); ++ if (0 < size && size < sizeof(buffer)) ++ { ++ va_start(args, fmt); ++ vsnprintf(buffer + size, sizeof(buffer) - size, fmt, args); ++ va_end(args); ++ } ++ buffer[sizeof(buffer) - 1] = '\0'; ++ ++#ifdef _WIN32 ++ OutputDebugStringA(buffer); ++#endif ++ ++ if (vkd3d_test_state.debug_level > 0) ++ printf("%s\n", buffer); ++} ++ ++#ifndef VKD3D_TEST_NO_DEFS ++const char *vkd3d_test_platform = "other"; ++struct vkd3d_test_state vkd3d_test_state; ++ ++static void vkd3d_test_main(int argc, char **argv); ++ ++int main(int argc, char **argv) ++{ ++ const char *test_filter = getenv("VKD3D_TEST_FILTER"); ++ const char *debug_level = getenv("VKD3D_TEST_DEBUG"); ++ char *test_platform = getenv("VKD3D_TEST_PLATFORM"); ++ const char *bug = getenv("VKD3D_TEST_BUG"); ++ ++ memset(&vkd3d_test_state, 0, sizeof(vkd3d_test_state)); ++ vkd3d_test_state.debug_level = debug_level ? atoi(debug_level) : 0; ++ vkd3d_test_state.bug_enabled = bug ? atoi(bug) : true; ++ vkd3d_test_state.test_name_filter = test_filter; ++ ++ if (test_platform) ++ { ++ test_platform = strdup(test_platform); ++ vkd3d_test_platform = test_platform; ++ } ++ ++ if (vkd3d_test_state.debug_level > 1) ++ printf("Test platform: '%s'.\n", vkd3d_test_platform); ++ ++ vkd3d_test_main(argc, argv); ++ ++ printf("%s: %lu tests executed (%lu failures, %lu skipped, %lu todo, %lu bugs).\n", ++ vkd3d_test_name, ++ (unsigned long)(vkd3d_test_state.success_count ++ + vkd3d_test_state.failure_count + vkd3d_test_state.todo_count ++ + vkd3d_test_state.todo_success_count), ++ (unsigned long)(vkd3d_test_state.failure_count ++ + vkd3d_test_state.todo_success_count), ++ (unsigned long)vkd3d_test_state.skip_count, ++ (unsigned long)vkd3d_test_state.todo_count, ++ (unsigned long)vkd3d_test_state.bug_count); ++ ++ if (test_platform) ++ free(test_platform); ++ ++ return vkd3d_test_state.failure_count || vkd3d_test_state.todo_success_count; ++} ++ ++#ifdef _WIN32 ++static char *vkd3d_test_strdupWtoA(WCHAR *str) ++{ ++ char *out; ++ int len; ++ ++ if (!(len = WideCharToMultiByte(CP_ACP, 0, str, -1, NULL, 0, NULL, NULL))) ++ return NULL; ++ if (!(out = malloc(len))) ++ return NULL; ++ WideCharToMultiByte(CP_ACP, 0, str, -1, out, len, NULL, NULL); ++ ++ return out; ++} ++ ++static bool running_under_wine(void) ++{ ++ HMODULE module = GetModuleHandleA("ntdll.dll"); ++ return module && GetProcAddress(module, "wine_server_call"); ++} ++ ++int wmain(int argc, WCHAR **wargv) ++{ ++ char **argv; ++ int i, ret; ++ ++ argv = malloc(argc * sizeof(*argv)); ++ assert(argv); ++ for (i = 0; i < argc; ++i) ++ { ++ if (!(argv[i] = vkd3d_test_strdupWtoA(wargv[i]))) ++ break; ++ } ++ assert(i == argc); ++ ++ vkd3d_test_platform = running_under_wine() ? "wine" : "windows"; ++ ++ ret = main(argc, argv); ++ ++ for (i = 0; i < argc; ++i) ++ free(argv[i]); ++ free(argv); ++ ++ return ret; ++} ++#endif /* _WIN32 */ ++#endif /* VKD3D_TEST_NO_DEFS */ ++ ++typedef void (*vkd3d_test_pfn)(void); ++ ++static inline void vkd3d_run_test(const char *name, vkd3d_test_pfn test_pfn) ++{ ++ if (vkd3d_test_state.test_name_filter && !strstr(name, vkd3d_test_state.test_name_filter)) ++ return; ++ ++ vkd3d_test_debug("%s", name); ++ test_pfn(); ++} ++ ++static inline void vkd3d_test_start_todo(bool is_todo) ++{ ++ vkd3d_test_state.todo_level = (vkd3d_test_state.todo_level << 1) | is_todo; ++ vkd3d_test_state.todo_do_loop = true; ++} ++ ++static inline int vkd3d_test_loop_todo(void) ++{ ++ bool do_loop = vkd3d_test_state.todo_do_loop; ++ vkd3d_test_state.todo_do_loop = false; ++ return do_loop; ++} ++ ++static inline void vkd3d_test_end_todo(void) ++{ ++ vkd3d_test_state.todo_level >>= 1; ++} ++ ++static inline void vkd3d_test_start_bug(bool is_bug) ++{ ++ vkd3d_test_state.bug_level = (vkd3d_test_state.bug_level << 1) | is_bug; ++ vkd3d_test_state.bug_do_loop = true; ++} ++ ++static inline int vkd3d_test_loop_bug(void) ++{ ++ bool do_loop = vkd3d_test_state.bug_do_loop; ++ vkd3d_test_state.bug_do_loop = false; ++ return do_loop; ++} ++ ++static inline void vkd3d_test_end_bug(void) ++{ ++ vkd3d_test_state.bug_level >>= 1; ++} ++ ++static inline void vkd3d_test_push_context(const char *fmt, ...) ++{ ++ va_list args; ++ ++ if (vkd3d_test_state.context_count < ARRAY_SIZE(vkd3d_test_state.context)) ++ { ++ va_start(args, fmt); ++ vsnprintf(vkd3d_test_state.context[vkd3d_test_state.context_count], ++ sizeof(vkd3d_test_state.context), fmt, args); ++ va_end(args); ++ vkd3d_test_state.context[vkd3d_test_state.context_count][sizeof(vkd3d_test_state.context[0]) - 1] = '\0'; ++ } ++ ++vkd3d_test_state.context_count; ++} ++ ++static inline void vkd3d_test_pop_context(void) ++{ ++ if (vkd3d_test_state.context_count) ++ --vkd3d_test_state.context_count; ++} ++ ++#define run_test(test_pfn) \ ++ vkd3d_run_test(#test_pfn, test_pfn) ++ ++#endif /* __VKD3D_TEST_H */ +diff --git a/libs/vkd3d/include/vkd3d_d3d9types.h b/libs/vkd3d/include/vkd3d_d3d9types.h +new file mode 100644 +index 00000000000..75d0461409d +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_d3d9types.h +@@ -0,0 +1,237 @@ ++/* ++ * Copyright 2002-2003 Jason Edmeades ++ * Copyright 2002-2003 Raphael Junqueira ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_D3D9TYPES_H ++#define __VKD3D_D3D9TYPES_H ++#ifndef _d3d9TYPES_H_ ++ ++#ifndef MAKEFOURCC ++#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ ++ ((DWORD)(BYTE)(ch0) | ((DWORD)(BYTE)(ch1) << 8) | \ ++ ((DWORD)(BYTE)(ch2) << 16) | ((DWORD)(BYTE)(ch3) << 24 )) ++#endif ++ ++#define D3DSI_INSTLENGTH_SHIFT 24 ++ ++#define D3DSP_DCL_USAGE_SHIFT 0 ++#define D3DSP_DCL_USAGEINDEX_SHIFT 16 ++#define D3DSP_DSTMOD_SHIFT 20 ++ ++#define D3DSP_SRCMOD_SHIFT 24 ++ ++#define D3DSP_REGTYPE_SHIFT 28 ++#define D3DSP_REGTYPE_SHIFT2 8 ++#define D3DSP_REGTYPE_MASK (0x7 << D3DSP_REGTYPE_SHIFT) ++#define D3DSP_REGTYPE_MASK2 0x00001800 ++ ++#define D3DSP_WRITEMASK_0 0x00010000 ++#define D3DSP_WRITEMASK_1 0x00020000 ++#define D3DSP_WRITEMASK_2 0x00040000 ++#define D3DSP_WRITEMASK_3 0x00080000 ++#define D3DSP_WRITEMASK_ALL 0x000f0000 ++ ++#define D3DPS_VERSION(major, minor) (0xffff0000 | ((major) << 8) | (minor)) ++#define D3DVS_VERSION(major, minor) (0xfffe0000 | ((major) << 8) | (minor)) ++ ++typedef enum _D3DDECLUSAGE ++{ ++ D3DDECLUSAGE_POSITION = 0x0, ++ D3DDECLUSAGE_BLENDWEIGHT = 0x1, ++ D3DDECLUSAGE_BLENDINDICES = 0x2, ++ D3DDECLUSAGE_NORMAL = 0x3, ++ D3DDECLUSAGE_PSIZE = 0x4, ++ D3DDECLUSAGE_TEXCOORD = 0x5, ++ D3DDECLUSAGE_TANGENT = 0x6, ++ D3DDECLUSAGE_BINORMAL = 0x7, ++ D3DDECLUSAGE_TESSFACTOR = 0x8, ++ D3DDECLUSAGE_POSITIONT = 0x9, ++ D3DDECLUSAGE_COLOR = 0xa, ++ D3DDECLUSAGE_FOG = 0xb, ++ D3DDECLUSAGE_DEPTH = 0xc, ++ D3DDECLUSAGE_SAMPLE = 0xd, ++} D3DDECLUSAGE; ++ ++typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE ++{ ++ D3DSIO_NOP = 0x00, ++ D3DSIO_MOV = 0x01, ++ D3DSIO_ADD = 0x02, ++ D3DSIO_SUB = 0x03, ++ D3DSIO_MAD = 0x04, ++ D3DSIO_MUL = 0x05, ++ D3DSIO_RCP = 0x06, ++ D3DSIO_RSQ = 0x07, ++ D3DSIO_DP3 = 0x08, ++ D3DSIO_DP4 = 0x09, ++ D3DSIO_MIN = 0x0a, ++ D3DSIO_MAX = 0x0b, ++ D3DSIO_SLT = 0x0c, ++ D3DSIO_SGE = 0x0d, ++ D3DSIO_EXP = 0x0e, ++ D3DSIO_LOG = 0x0f, ++ D3DSIO_LIT = 0x10, ++ D3DSIO_DST = 0x11, ++ D3DSIO_LRP = 0x12, ++ D3DSIO_FRC = 0x13, ++ D3DSIO_M4x4 = 0x14, ++ D3DSIO_M4x3 = 0x15, ++ D3DSIO_M3x4 = 0x16, ++ D3DSIO_M3x3 = 0x17, ++ D3DSIO_M3x2 = 0x18, ++ D3DSIO_CALL = 0x19, ++ D3DSIO_CALLNZ = 0x1a, ++ D3DSIO_LOOP = 0x1b, ++ D3DSIO_RET = 0x1c, ++ D3DSIO_ENDLOOP = 0x1d, ++ D3DSIO_LABEL = 0x1e, ++ D3DSIO_DCL = 0x1f, ++ D3DSIO_POW = 0x20, ++ D3DSIO_CRS = 0x21, ++ D3DSIO_SGN = 0x22, ++ D3DSIO_ABS = 0x23, ++ D3DSIO_NRM = 0x24, ++ D3DSIO_SINCOS = 0x25, ++ D3DSIO_REP = 0x26, ++ D3DSIO_ENDREP = 0x27, ++ D3DSIO_IF = 0x28, ++ D3DSIO_IFC = 0x29, ++ D3DSIO_ELSE = 0x2a, ++ D3DSIO_ENDIF = 0x2b, ++ D3DSIO_BREAK = 0x2c, ++ D3DSIO_BREAKC = 0x2d, ++ D3DSIO_MOVA = 0x2e, ++ D3DSIO_DEFB = 0x2f, ++ D3DSIO_DEFI = 0x30, ++ ++ D3DSIO_TEXCOORD = 0x40, ++ D3DSIO_TEXKILL = 0x41, ++ D3DSIO_TEX = 0x42, ++ D3DSIO_TEXBEM = 0x43, ++ D3DSIO_TEXBEML = 0x44, ++ D3DSIO_TEXREG2AR = 0x45, ++ D3DSIO_TEXREG2GB = 0x46, ++ D3DSIO_TEXM3x2PAD = 0x47, ++ D3DSIO_TEXM3x2TEX = 0x48, ++ D3DSIO_TEXM3x3PAD = 0x49, ++ D3DSIO_TEXM3x3TEX = 0x4a, ++ D3DSIO_TEXM3x3DIFF = 0x4b, ++ D3DSIO_TEXM3x3SPEC = 0x4c, ++ D3DSIO_TEXM3x3VSPEC = 0x4d, ++ D3DSIO_EXPP = 0x4e, ++ D3DSIO_LOGP = 0x4f, ++ D3DSIO_CND = 0x50, ++ D3DSIO_DEF = 0x51, ++ D3DSIO_TEXREG2RGB = 0x52, ++ D3DSIO_TEXDP3TEX = 0x53, ++ D3DSIO_TEXM3x2DEPTH = 0x54, ++ D3DSIO_TEXDP3 = 0x55, ++ D3DSIO_TEXM3x3 = 0x56, ++ D3DSIO_TEXDEPTH = 0x57, ++ D3DSIO_CMP = 0x58, ++ D3DSIO_BEM = 0x59, ++ D3DSIO_DP2ADD = 0x5a, ++ D3DSIO_DSX = 0x5b, ++ D3DSIO_DSY = 0x5c, ++ D3DSIO_TEXLDD = 0x5d, ++ D3DSIO_SETP = 0x5e, ++ D3DSIO_TEXLDL = 0x5f, ++ D3DSIO_BREAKP = 0x60, ++ ++ D3DSIO_PHASE = 0xfffd, ++ D3DSIO_COMMENT = 0xfffe, ++ D3DSIO_END = 0xffff, ++ ++ D3DSIO_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_INSTRUCTION_OPCODE_TYPE; ++ ++typedef enum _D3DSHADER_PARAM_DSTMOD_TYPE ++{ ++ D3DSPDM_NONE = 0 << D3DSP_DSTMOD_SHIFT, ++ D3DSPDM_SATURATE = 1 << D3DSP_DSTMOD_SHIFT, ++ D3DSPDM_PARTIALPRECISION = 2 << D3DSP_DSTMOD_SHIFT, ++ D3DSPDM_MSAMPCENTROID = 4 << D3DSP_DSTMOD_SHIFT, ++ ++ D3DSPDM_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_PARAM_DSTMOD_TYPE; ++ ++typedef enum _D3DSHADER_PARAM_REGISTER_TYPE ++{ ++ D3DSPR_TEMP = 0x00, ++ D3DSPR_INPUT = 0x01, ++ D3DSPR_CONST = 0x02, ++ D3DSPR_ADDR = 0x03, ++ D3DSPR_TEXTURE = 0x03, ++ D3DSPR_RASTOUT = 0x04, ++ D3DSPR_ATTROUT = 0x05, ++ D3DSPR_TEXCRDOUT = 0x06, ++ D3DSPR_OUTPUT = 0x06, ++ D3DSPR_CONSTINT = 0x07, ++ D3DSPR_COLOROUT = 0x08, ++ D3DSPR_DEPTHOUT = 0x09, ++ D3DSPR_SAMPLER = 0x0a, ++ D3DSPR_CONST2 = 0x0b, ++ D3DSPR_CONST3 = 0x0c, ++ D3DSPR_CONST4 = 0x0d, ++ D3DSPR_CONSTBOOL = 0x0e, ++ D3DSPR_LOOP = 0x0f, ++ D3DSPR_TEMPFLOAT16 = 0x10, ++ D3DSPR_MISCTYPE = 0x11, ++ D3DSPR_LABEL = 0x12, ++ D3DSPR_PREDICATE = 0x13, ++ ++ D3DSPR_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_PARAM_REGISTER_TYPE; ++ ++typedef enum _D3DSHADER_PARAM_SRCMOD_TYPE ++{ ++ D3DSPSM_NONE = 0x0 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_NEG = 0x1 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_BIAS = 0x2 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_BIASNEG = 0x3 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_SIGN = 0x4 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_SIGNNEG = 0x5 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_COMP = 0x6 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_X2 = 0x7 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_X2NEG = 0x8 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_DZ = 0x9 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_DW = 0xa << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_ABS = 0xb << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_ABSNEG = 0xc << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_NOT = 0xd << D3DSP_SRCMOD_SHIFT, ++ ++ D3DSPSM_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_PARAM_SRCMOD_TYPE; ++ ++typedef enum _D3DSHADER_MISCTYPE_OFFSETS ++{ ++ D3DSMO_POSITION = 0x0, ++ D3DSMO_FACE = 0x1, ++} D3DSHADER_MISCTYPE_OFFSETS; ++ ++typedef enum _D3DVS_RASTOUT_OFFSETS ++{ ++ D3DSRO_POSITION = 0x0, ++ D3DSRO_FOG = 0x1, ++ D3DSRO_POINT_SIZE = 0x2, ++ ++ D3DSRO_FORCE_DWORD = 0x7fffffff, ++} D3DVS_RASTOUT_OFFSETS; ++ ++#endif /* _d3d9TYPES_H_ */ ++#endif /* __VKD3D_D3D9TYPES_H */ +diff --git a/libs/vkd3d/include/vkd3d_d3dcompiler.h b/libs/vkd3d/include/vkd3d_d3dcompiler.h +new file mode 100644 +index 00000000000..c934835dc0a +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_d3dcompiler.h +@@ -0,0 +1,74 @@ ++/* ++ * Copyright 2010 Matteo Bruni for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_D3DCOMPILER_H ++#define __VKD3D_D3DCOMPILER_H ++#ifndef __D3DCOMPILER_H__ ++ ++#define D3DCOMPILE_DEBUG 0x00000001 ++#define D3DCOMPILE_SKIP_VALIDATION 0x00000002 ++#define D3DCOMPILE_SKIP_OPTIMIZATION 0x00000004 ++#define D3DCOMPILE_PACK_MATRIX_ROW_MAJOR 0x00000008 ++#define D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR 0x00000010 ++#define D3DCOMPILE_PARTIAL_PRECISION 0x00000020 ++#define D3DCOMPILE_FORCE_VS_SOFTWARE_NO_OPT 0x00000040 ++#define D3DCOMPILE_FORCE_PS_SOFTWARE_NO_OPT 0x00000080 ++#define D3DCOMPILE_NO_PRESHADER 0x00000100 ++#define D3DCOMPILE_AVOID_FLOW_CONTROL 0x00000200 ++#define D3DCOMPILE_PREFER_FLOW_CONTROL 0x00000400 ++#define D3DCOMPILE_ENABLE_STRICTNESS 0x00000800 ++#define D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY 0x00001000 ++#define D3DCOMPILE_IEEE_STRICTNESS 0x00002000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL0 0x00004000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL1 0x00000000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL2 0x0000c000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL3 0x00008000 ++#define D3DCOMPILE_RESERVED16 0x00010000 ++#define D3DCOMPILE_RESERVED17 0x00020000 ++#define D3DCOMPILE_WARNINGS_ARE_ERRORS 0x00040000 ++#define D3DCOMPILE_RESOURCES_MAY_ALIAS 0x00080000 ++#define D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES 0x00100000 ++#define D3DCOMPILE_ALL_RESOURCES_BOUND 0x00200000 ++#define D3DCOMPILE_DEBUG_NAME_FOR_SOURCE 0x00400000 ++#define D3DCOMPILE_DEBUG_NAME_FOR_BINARY 0x00800000 ++ ++#define D3DCOMPILE_EFFECT_CHILD_EFFECT 0x00000001 ++#define D3DCOMPILE_EFFECT_ALLOW_SLOW_OPS 0x00000002 ++ ++#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_LATEST 0x00000000 ++#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_0 0x00000010 ++#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_1 0x00000020 ++ ++#define D3DCOMPILE_SECDATA_MERGE_UAV_SLOTS 0x00000001 ++#define D3DCOMPILE_SECDATA_PRESERVE_TEMPLATE_SLOTS 0x00000002 ++#define D3DCOMPILE_SECDATA_REQUIRE_TEMPLATE_MATCH 0x00000004 ++ ++HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, ++ const char *profile, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); ++HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, ++ const char *profile, UINT flags, UINT effect_flags, UINT secondary_flags, ++ const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, ++ ID3DBlob **error_messages); ++HRESULT WINAPI D3DCreateBlob(SIZE_T size, ID3DBlob **blob); ++HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, const D3D_SHADER_MACRO *macros, ++ ID3DInclude *include, ID3DBlob **shader, ID3DBlob **error_messages); ++ ++#endif /* __D3DCOMPILER_H__ */ ++#endif /* __VKD3D_D3DCOMPILER_H */ +diff --git a/libs/vkd3d/include/vkd3d_utils.h b/libs/vkd3d/include/vkd3d_utils.h +new file mode 100644 +index 00000000000..e8462563576 +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_utils.h +@@ -0,0 +1,108 @@ ++/* ++ * Copyright 2016 Józef Kucia for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_UTILS_H ++#define __VKD3D_UTILS_H ++ ++#include ++ ++#ifndef VKD3D_UTILS_API_VERSION ++#define VKD3D_UTILS_API_VERSION VKD3D_API_VERSION_1_0 ++#endif ++ ++#ifdef __cplusplus ++extern "C" { ++#endif /* __cplusplus */ ++ ++/** ++ * \file vkd3d_utils.h ++ * ++ * This file contains definitions for the vkd3d-utils library. ++ * ++ * The vkd3d-utils library is a collections of routines to ease the ++ * porting of a Direct3D 12 application to vkd3d. ++ * ++ * \since 1.0 ++ */ ++ ++#define VKD3D_WAIT_OBJECT_0 (0) ++#define VKD3D_WAIT_TIMEOUT (1) ++#define VKD3D_WAIT_FAILED (~0u) ++#define VKD3D_INFINITE (~0u) ++ ++#ifdef LIBVKD3D_UTILS_SOURCE ++# define VKD3D_UTILS_API VKD3D_EXPORT ++#else ++# define VKD3D_UTILS_API VKD3D_IMPORT ++#endif ++ ++/* 1.0 */ ++VKD3D_UTILS_API HANDLE vkd3d_create_event(void); ++VKD3D_UTILS_API HRESULT vkd3d_signal_event(HANDLE event); ++VKD3D_UTILS_API unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds); ++VKD3D_UTILS_API void vkd3d_destroy_event(HANDLE event); ++ ++#define D3D12CreateDevice(a, b, c, d) D3D12CreateDeviceVKD3D(a, b, c, d, VKD3D_UTILS_API_VERSION) ++VKD3D_UTILS_API HRESULT WINAPI D3D12CreateRootSignatureDeserializer( ++ const void *data, SIZE_T data_size, REFIID iid, void **deserializer); ++VKD3D_UTILS_API HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug); ++VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc, ++ D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob); ++ ++/* 1.2 */ ++VKD3D_UTILS_API HRESULT WINAPI D3D12CreateDeviceVKD3D(IUnknown *adapter, D3D_FEATURE_LEVEL feature_level, ++ REFIID iid, void **device, enum vkd3d_api_version api_version); ++VKD3D_UTILS_API HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data, ++ SIZE_T data_size, REFIID iid, void **deserializer); ++VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc, ++ ID3DBlob **blob, ID3DBlob **error_blob); ++ ++/* 1.3 */ ++VKD3D_UTILS_API HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, ++ const char *target, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); ++VKD3D_UTILS_API HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, ++ const char *target, UINT flags, UINT effect_flags, UINT secondary_flags, ++ const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, ++ ID3DBlob **error_messages); ++VKD3D_UTILS_API HRESULT WINAPI D3DCreateBlob(SIZE_T data_size, ID3DBlob **blob); ++VKD3D_UTILS_API HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, ++ const D3D_SHADER_MACRO *defines, ID3DInclude *include, ++ ID3DBlob **shader, ID3DBlob **error_messages); ++ ++/** ++ * Set a callback to be called when vkd3d-utils outputs debug logging. ++ * ++ * If NULL, or if this function has not been called, libvkd3d-utils will print ++ * all enabled log output to stderr. ++ * ++ * Calling this function will also set the log callback for libvkd3d and ++ * libvkd3d-shader. ++ * ++ * \param callback Callback function to set. ++ * ++ * \since 1.4 ++ */ ++VKD3D_UTILS_API void vkd3d_utils_set_log_callback(PFN_vkd3d_log callback); ++ ++#ifdef __cplusplus ++} ++#endif /* __cplusplus */ ++ ++#endif /* __VKD3D_UTILS_H */ +diff --git a/libs/vkd3d/include/vkd3d_windows.h b/libs/vkd3d/include/vkd3d_windows.h +new file mode 100644 +index 00000000000..7b0e972d828 +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_windows.h +@@ -0,0 +1,289 @@ ++/* ++ * Copyright 2016 Józef Kucia for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_WINDOWS_H ++#define __VKD3D_WINDOWS_H ++#ifndef _INC_WINDOWS ++ ++/* Nameless unions */ ++#ifndef __C89_NAMELESS ++# ifdef NONAMELESSUNION ++# define __C89_NAMELESS ++# define __C89_NAMELESSUNIONNAME u ++# else ++# define __C89_NAMELESS ++# define __C89_NAMELESSUNIONNAME ++# endif /* NONAMELESSUNION */ ++#endif /* __C89_NAMELESS */ ++ ++#if !defined(_WIN32) || defined(__WIDL__) ++ ++# if !defined(__WIDL__) ++# if !defined(VKD3D_WIN32_WCHAR) ++# include ++# endif ++# include ++# endif ++ ++# ifdef __GNUC__ ++# define DECLSPEC_ALIGN(x) __attribute__((aligned(x))) ++# endif ++ ++/* HRESULT */ ++typedef int HRESULT; ++# define SUCCEEDED(hr) ((HRESULT)(hr) >= 0) ++# define FAILED(hr) ((HRESULT)(hr) < 0) ++ ++# define _HRESULT_TYPEDEF_(x) ((HRESULT)x) ++ ++# define S_OK _HRESULT_TYPEDEF_(0) ++# define S_FALSE _HRESULT_TYPEDEF_(1) ++ ++# define E_NOTIMPL _HRESULT_TYPEDEF_(0x80004001) ++# define E_NOINTERFACE _HRESULT_TYPEDEF_(0x80004002) ++# define E_POINTER _HRESULT_TYPEDEF_(0x80004003) ++# define E_ABORT _HRESULT_TYPEDEF_(0x80004004) ++# define E_FAIL _HRESULT_TYPEDEF_(0x80004005) ++# define E_OUTOFMEMORY _HRESULT_TYPEDEF_(0x8007000E) ++# define E_INVALIDARG _HRESULT_TYPEDEF_(0x80070057) ++ ++# define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) ++# define DXGI_ERROR_MORE_DATA _HRESULT_TYPEDEF_(0x887a0003) ++# define DXGI_ERROR_UNSUPPORTED _HRESULT_TYPEDEF_(0x887a0004) ++ ++# define D3DERR_INVALIDCALL _HRESULT_TYPEDEF_(0x8876086c) ++ ++/* Basic types */ ++typedef unsigned char BYTE; ++typedef unsigned int DWORD; ++typedef int INT; ++typedef unsigned int UINT; ++typedef int LONG; ++typedef unsigned int ULONG; ++typedef float FLOAT; ++typedef LONG BOOL; ++ ++/* Assuming LP64 model */ ++typedef char INT8; ++typedef unsigned char UINT8; ++typedef short INT16; ++typedef unsigned short UINT16; ++typedef int INT32; ++typedef unsigned int UINT32; ++# if defined(__WIDL__) ++typedef __int64 INT64; ++typedef unsigned __int64 UINT64; ++# else ++typedef int64_t DECLSPEC_ALIGN(8) INT64; ++typedef uint64_t DECLSPEC_ALIGN(8) UINT64; ++# endif ++typedef INT64 LONG64; ++typedef long LONG_PTR; ++typedef unsigned long ULONG_PTR; ++ ++typedef ULONG_PTR SIZE_T; ++ ++# ifdef VKD3D_WIN32_WCHAR ++typedef unsigned short WCHAR; ++# else ++typedef wchar_t WCHAR; ++# endif /* VKD3D_WIN32_WCHAR */ ++typedef void *HANDLE; ++ ++/* GUID */ ++# ifdef __WIDL__ ++typedef struct ++{ ++ unsigned long Data1; ++ unsigned short Data2; ++ unsigned short Data3; ++ unsigned char Data4[8]; ++} GUID; ++# else ++typedef struct _GUID ++{ ++ unsigned int Data1; ++ unsigned short Data2; ++ unsigned short Data3; ++ unsigned char Data4[8]; ++} GUID; ++# endif ++ ++typedef GUID IID; ++typedef GUID CLSID; ++typedef GUID UUID; ++ ++# ifdef INITGUID ++# ifndef __cplusplus ++# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ const GUID name DECLSPEC_HIDDEN; \ ++ const GUID name = \ ++ { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} ++# else ++# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ EXTERN_C const GUID name DECLSPEC_HIDDEN; \ ++ EXTERN_C const GUID name = \ ++ { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} ++# endif ++# else ++# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ EXTERN_C const GUID name DECLSPEC_HIDDEN; ++# endif /* INITGUID */ ++ ++/* __uuidof emulation */ ++#if defined(__cplusplus) && !defined(_MSC_VER) ++ ++extern "C++" ++{ ++ template const GUID &__vkd3d_uuidof(); ++} ++ ++# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ extern "C++" \ ++ { \ ++ template<> inline const GUID &__vkd3d_uuidof() \ ++ { \ ++ static const IID __uuid_inst = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}; \ ++ return __uuid_inst; \ ++ } \ ++ template<> inline const GUID &__vkd3d_uuidof() \ ++ { \ ++ return __vkd3d_uuidof(); \ ++ } \ ++ } ++ ++# define __uuidof(type) __vkd3d_uuidof() ++#else ++# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) ++#endif /* defined(__cplusplus) && !defined(_MSC_VER) */ ++ ++typedef struct SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES; ++#endif /* !defined(_WIN32) || defined(__WIDL__) */ ++ ++ ++#ifndef _WIN32 ++# include ++# include ++# include ++ ++# define COM_NO_WINDOWS_H ++ ++# define FORCEINLINE inline ++ ++# define CONTAINING_RECORD(address, type, field) \ ++ ((type *)((char *)(address) - offsetof(type, field))) ++ ++# ifdef __x86_64__ ++# define __stdcall __attribute__((ms_abi)) ++# else ++# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 2)) || defined(__APPLE__) ++# define __stdcall __attribute__((__stdcall__)) __attribute__((__force_align_arg_pointer__)) ++# else ++# define __stdcall __attribute__((__stdcall__)) ++# endif ++# endif ++ ++# define WINAPI __stdcall ++# define STDMETHODCALLTYPE __stdcall ++ ++# ifdef __GNUC__ ++# define DECLSPEC_SELECTANY __attribute__((weak)) ++# endif ++ ++/* Macros for COM interfaces */ ++# define interface struct ++# define BEGIN_INTERFACE ++# define END_INTERFACE ++# define MIDL_INTERFACE(x) struct ++ ++# ifdef __cplusplus ++# define EXTERN_C extern "C" ++# else ++# define EXTERN_C extern ++# endif ++ ++# define CONST_VTBL const ++ ++# define TRUE 1 ++# define FALSE 0 ++ ++# if defined(__cplusplus) && !defined(CINTERFACE) ++# define REFIID const IID & ++# define REFGUID const GUID & ++# define REFCLSID const CLSID & ++# else ++# define REFIID const IID * const ++# define REFGUID const GUID * const ++# define REFCLSID const CLSID * const ++# endif ++ ++#if defined(__cplusplus) && !defined(CINTERFACE) ++# define IsEqualGUID(guid1, guid2) (!memcmp(&(guid1), &(guid2), sizeof(GUID))) ++#else ++# define IsEqualGUID(guid1, guid2) (!memcmp(guid1, guid2, sizeof(GUID))) ++#endif ++ ++#elif !defined(__WIDL__) ++ ++# include ++ ++#endif /* _WIN32 */ ++ ++ ++/* Define DECLSPEC_HIDDEN */ ++#ifndef DECLSPEC_HIDDEN ++# if defined(__MINGW32__) ++# define DECLSPEC_HIDDEN ++# elif defined(__GNUC__) ++# define DECLSPEC_HIDDEN __attribute__((visibility("hidden"))) ++# else ++# define DECLSPEC_HIDDEN ++# endif ++#endif /* DECLSPEC_HIDDEN */ ++ ++/* Define min() & max() macros */ ++#ifndef NOMINMAX ++# ifndef min ++# define min(a, b) (((a) <= (b)) ? (a) : (b)) ++# endif ++ ++# ifndef max ++# define max(a, b) (((a) >= (b)) ? (a) : (b)) ++# endif ++#endif /* NOMINMAX */ ++ ++#ifndef DEFINE_ENUM_FLAG_OPERATORS ++#ifdef __cplusplus ++# define DEFINE_ENUM_FLAG_OPERATORS(type) \ ++extern "C++" \ ++{ \ ++ inline type operator &(type x, type y) { return (type)((int)x & (int)y); } \ ++ inline type operator &=(type &x, type y) { return (type &)((int &)x &= (int)y); } \ ++ inline type operator ~(type x) { return (type)~(int)x; } \ ++ inline type operator |(type x, type y) { return (type)((int)x | (int)y); } \ ++ inline type operator |=(type &x, type y) { return (type &)((int &)x |= (int)y); } \ ++ inline type operator ^(type x, type y) { return (type)((int)x ^ (int)y); } \ ++ inline type operator ^=(type &x, type y) { return (type &)((int &)x ^= (int)y); } \ ++} ++#else ++# define DEFINE_ENUM_FLAG_OPERATORS(type) ++#endif ++#endif /* DEFINE_ENUM_FLAG_OPERATORS */ ++ ++#endif /* _INC_WINDOWS */ ++#endif /* __VKD3D_WINDOWS_H */ +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index 30205088b1b..ce00e536d39 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -17,6 +17,7 @@ + */ + + #define COBJMACROS ++ + #include "vkd3d.h" + #include "vkd3d_blob.h" + #include "vkd3d_debug.h" +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index f0c386f1b3a..2c5108095d5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -1374,7 +1374,7 @@ static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, + if (mmask & VKD3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp"); + if (mmask & VKD3DSPDM_MSAMPCENTROID) shader_addline(buffer, "_centroid"); + +- mmask &= ~(VKD3DSPDM_SATURATE | VKD3DSPDM_PARTIALPRECISION | VKD3DSPDM_MSAMPCENTROID); ++ mmask &= ~VKD3DSPDM_MASK; + if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask); + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 1fd5ab2446d..0d2b8d248d1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -261,7 +261,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = + {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, + {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, + /* Declarations */ +- {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, ++ {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, + /* Constant definitions */ + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +@@ -328,7 +328,7 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = + {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, + {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, + /* Declarations */ +- {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, ++ {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, + /* Constant definitions */ + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +@@ -853,6 +853,14 @@ static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, co + return; + } + ++ /* DCL instructions do not have sources or destinations, but they ++ * read two tokens to a semantic. See ++ * shader_sm1_read_semantic(). */ ++ if (opcode_info->vkd3d_opcode == VKD3DSIH_DCL) ++ { ++ *ptr += 2; ++ } ++ + *ptr += (opcode_info->dst_count + opcode_info->src_count); + } + +@@ -1090,7 +1098,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + goto fail; + } + +- ins->handler_idx = opcode_info->vkd3d_opcode; ++ vsir_instruction_init(ins, &sm1->p.location, opcode_info->vkd3d_opcode); + ins->flags = (opcode_token & VKD3D_SM1_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + ins->coissue = opcode_token & VKD3D_SM1_COISSUE; + ins->raw = false; +@@ -1333,12 +1341,22 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + ++instructions->count; + } + +- *parser = &sm1->p; +- + for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) + sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); + +- return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; ++ if (!sm1->p.failed) ++ vsir_validate(&sm1->p); ++ ++ if (sm1->p.failed) ++ { ++ WARN("Failed to parse shader.\n"); ++ shader_sm1_destroy(&sm1->p); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ *parser = &sm1->p; ++ ++ return VKD3D_OK; + } + + bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index b78c78d34a7..b778f6abed3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -1999,7 +1999,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa + { + struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); + assert(ins); +- shader_instruction_init(ins, handler_idx); ++ vsir_instruction_init(ins, &sm6->p.location, handler_idx); + ++sm6->p.instructions.count; + return ins; + } +@@ -2190,7 +2190,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, struct sm6_b + return; + } + +- shader_instruction_init(ins, VKD3DSIH_MOV); ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + + if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) + return; +@@ -2955,6 +2955,12 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi + compile_info->source_name, message_context); + vkd3d_free(byte_code); + ++ if (!sm6->p.failed && ret >= 0) ++ vsir_validate(&sm6->p); ++ ++ if (sm6->p.failed && ret >= 0) ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ + if (ret < 0) + { + WARN("Failed to initialise shader parser.\n"); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 743a746f2bf..2cde5d58eba 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -21,7 +21,7 @@ + #define __VKD3D_SHADER_HLSL_H + + #include "vkd3d_shader_private.h" +-#include "wine/rbtree.h" ++#include "rbtree.h" + #include "d3dcommon.h" + #include "d3dx9shader.h" + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index be024842164..ed31efc3f0b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -2523,36 +2523,19 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) + { +- struct hlsl_block then_block, else_block; +- struct hlsl_ir_node *iff, *store; +- struct hlsl_ir_load *load; +- struct hlsl_ir_var *var; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; ++ struct hlsl_ir_node *cond; + + assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + +- if (!(var = hlsl_new_synthetic_var(ctx, "conditional", if_true->data_type, &condition->loc))) +- return NULL; +- +- hlsl_block_init(&then_block); +- hlsl_block_init(&else_block); +- +- if (!(store = hlsl_new_simple_store(ctx, var, if_true))) +- return NULL; +- hlsl_block_add_instr(&then_block, store); +- +- if (!(store = hlsl_new_simple_store(ctx, var, if_false))) +- return NULL; +- hlsl_block_add_instr(&else_block, store); +- +- if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) +- return NULL; +- hlsl_block_add_instr(instrs, iff); +- +- if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) +- return NULL; +- hlsl_block_add_instr(instrs, &load->node); ++ operands[0] = condition; ++ operands[1] = if_true; ++ operands[2] = if_false; ++ if (!(cond = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, if_true->data_type, &condition->loc))) ++ return false; ++ hlsl_block_add_instr(instrs, cond); + +- return &load->node; ++ return cond; + } + + static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index 41a72ab6c0d..cff0ba31efb 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -63,6 +63,56 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + return true; + } + ++static uint32_t float_to_uint(float x) ++{ ++ if (isnan(x) || x <= 0) ++ return 0; ++ ++ if (x >= 4294967296.0f) ++ return UINT32_MAX; ++ ++ return x; ++} ++ ++static int32_t float_to_int(float x) ++{ ++ if (isnan(x)) ++ return 0; ++ ++ if (x <= -2147483648.0f) ++ return INT32_MIN; ++ ++ if (x >= 2147483648.0f) ++ return INT32_MAX; ++ ++ return x; ++} ++ ++static uint32_t double_to_uint(double x) ++{ ++ if (isnan(x) || x <= 0) ++ return 0; ++ ++ if (x >= 4294967296.0) ++ return UINT32_MAX; ++ ++ return x; ++} ++ ++static int32_t double_to_int(double x) ++{ ++ if (isnan(x)) ++ return 0; ++ ++ if (x <= -2147483648.0) ++ return INT32_MIN; ++ ++ if (x >= 2147483648.0) ++ return INT32_MAX; ++ ++ return x; ++} ++ + static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +@@ -86,15 +136,15 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- u = src->value.u[k].f; +- i = src->value.u[k].f; ++ u = float_to_uint(src->value.u[k].f); ++ i = float_to_int(src->value.u[k].f); + f = src->value.u[k].f; + d = src->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: +- u = src->value.u[k].d; +- i = src->value.u[k].d; ++ u = double_to_uint(src->value.u[k].d); ++ i = double_to_int(src->value.u[k].d); + f = src->value.u[k].d; + d = src->value.u[k].d; + break; +@@ -152,6 +202,59 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + return true; + } + ++static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, ++ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = exp2f(src->value.u[k].f); ++ break; ++ ++ default: ++ FIXME("Fold 'exp2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, ++ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ float i; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = modff(src->value.u[k].f, &i); ++ break; ++ ++ default: ++ FIXME("Fold 'fract' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) + { +@@ -276,6 +379,32 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } + ++static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = min(max(0.0f, src->value.u[k].f), 1.0f); ++ break; ++ ++ default: ++ FIXME("Fold 'sat' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) + { +@@ -869,6 +998,40 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + return true; + } + ++static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) ++{ ++ unsigned int k; ++ ++ assert(dst_type->base_type == src2->node.data_type->base_type); ++ assert(dst_type->base_type == src3->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (src1->node.data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k]; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k]; ++ break; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ } ++ return true; ++} ++ + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; +@@ -912,6 +1075,14 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_cast(ctx, &res, instr->data_type, arg1); + break; + ++ case HLSL_OP1_EXP2: ++ success = fold_exp2(ctx, &res, instr->data_type, arg1); ++ break; ++ ++ case HLSL_OP1_FRACT: ++ success = fold_fract(ctx, &res, instr->data_type, arg1); ++ break; ++ + case HLSL_OP1_LOG2: + success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); + break; +@@ -924,6 +1095,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + ++ case HLSL_OP1_SAT: ++ success = fold_sat(ctx, &res, instr->data_type, arg1); ++ break; ++ + case HLSL_OP1_SQRT: + success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); + break; +@@ -990,6 +1165,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); + break; + ++ case HLSL_OP3_TERNARY: ++ success = fold_ternary(ctx, &res, instr->data_type, arg1, arg2, arg3); ++ break; ++ + default: + FIXME("Fold \"%s\" expression.\n", debug_hlsl_expr_op(expr->op)); + success = false; +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index d2bfb933edc..9ee38ffee37 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -31,11 +31,9 @@ static bool shader_instruction_is_dcl(const struct vkd3d_shader_instruction *ins + + static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) + { +- ins->handler_idx = VKD3DSIH_NOP; +- ins->dst_count = 0; +- ins->src_count = 0; +- ins->dst = NULL; +- ins->src = NULL; ++ struct vkd3d_shader_location location = ins->location; ++ ++ vsir_instruction_init(ins, &location, VKD3DSIH_NOP); + } + + static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg, +@@ -161,6 +159,7 @@ struct hull_flattener + unsigned int instance_count; + unsigned int phase_body_idx; + enum vkd3d_shader_opcode phase; ++ struct vkd3d_shader_location last_ret_location; + }; + + static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener) +@@ -233,6 +232,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal + + if (ins->handler_idx == VKD3DSIH_RET) + { ++ normaliser->last_ret_location = ins->location; + vkd3d_shader_instruction_make_nop(ins); + if (locations->count >= ARRAY_SIZE(locations->locations)) + { +@@ -313,9 +313,11 @@ void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_r + reg->immconst_type = VKD3D_IMMCONST_SCALAR; + } + +-void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) ++void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, ++ enum vkd3d_shader_opcode handler_idx) + { + memset(ins, 0, sizeof(*ins)); ++ ins->location = *location; + ins->handler_idx = handler_idx; + } + +@@ -343,7 +345,7 @@ static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd + + if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- shader_instruction_init(&instructions->elements[instructions->count++], VKD3DSIH_RET); ++ vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); + } + + *src_instructions = flattener.instructions; +@@ -404,7 +406,8 @@ static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const + } + + static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, +- const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst) ++ const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, ++ const struct vkd3d_shader_location *location) + { + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_dst_param *param; +@@ -422,7 +425,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p + normaliser->instructions.count += count; + + ins = &normaliser->instructions.elements[dst]; +- shader_instruction_init(ins, VKD3DSIH_HS_CONTROL_POINT_PHASE); ++ vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); + ins->flags = 1; + ++ins; + +@@ -434,13 +437,13 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p + + if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) + { +- shader_instruction_init(ins, VKD3DSIH_DCL_INPUT_SIV); ++ vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV); + param = &ins->declaration.register_semantic.reg; + ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); + } + else + { +- shader_instruction_init(ins, VKD3DSIH_DCL_INPUT); ++ vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT); + param = &ins->declaration.dst; + } + +@@ -511,7 +514,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, +- input_control_point_count, i); ++ input_control_point_count, i, &ins->location); + *src_instructions = normaliser.instructions; + return ret; + default: +@@ -1014,7 +1017,6 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + struct io_normaliser *normaliser) + { + struct vkd3d_shader_register *reg; +- bool keep = true; + unsigned int i; + + switch (ins->handler_idx) +@@ -1033,15 +1035,16 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + /* fall through */ + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_OUTPUT: +- keep = shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser); ++ if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser)) ++ vkd3d_shader_instruction_make_nop(ins); + break; + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: +- keep = shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, +- normaliser); ++ if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser)) ++ vkd3d_shader_instruction_make_nop(ins); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: +@@ -1060,9 +1063,6 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + shader_src_param_io_normalise((struct vkd3d_shader_src_param *)&ins->src[i], normaliser); + break; + } +- +- if (!keep) +- shader_instruction_init(ins, VKD3DSIH_NOP); + } + + static enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, +@@ -1290,5 +1290,117 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + if (result >= 0 && TRACE_ON()) + vkd3d_shader_trace(instructions, &parser->shader_version); + ++ if (result >= 0 && !parser->failed) ++ vsir_validate(parser); ++ ++ if (result >= 0 && parser->failed) ++ result = VKD3D_ERROR_INVALID_SHADER; ++ + return result; + } ++ ++struct validation_context ++{ ++ struct vkd3d_shader_parser *parser; ++ size_t instruction_idx; ++}; ++ ++static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, ++ enum vkd3d_shader_error error, const char *format, ...) ++{ ++ struct vkd3d_string_buffer buf; ++ va_list args; ++ ++ vkd3d_string_buffer_init(&buf); ++ ++ va_start(args, format); ++ vkd3d_string_buffer_vprintf(&buf, format, args); ++ va_end(args); ++ ++ vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); ++ ++ vkd3d_string_buffer_cleanup(&buf); ++} ++ ++static void vsir_validate_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->type >= VKD3DSPR_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.", ++ reg->type); ++} ++ ++static void vsir_validate_dst_param(struct validation_context *ctx, ++ const struct vkd3d_shader_dst_param *dst) ++{ ++ vsir_validate_register(ctx, &dst->reg); ++ ++ if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", ++ dst->write_mask); ++ ++ if (dst->modifiers & ~VKD3DSPDM_MASK) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", ++ dst->modifiers); ++ ++ switch (dst->shift) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 3: ++ case 13: ++ case 14: ++ case 15: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", ++ dst->shift); ++ } ++} ++ ++static void vsir_validate_src_param(struct validation_context *ctx, ++ const struct vkd3d_shader_src_param *src) ++{ ++ vsir_validate_register(ctx, &src->reg); ++ ++ if (src->swizzle & ~0x03030303u) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", ++ src->swizzle); ++ ++ if (src->modifiers >= VKD3DSPSM_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", ++ src->modifiers); ++} ++ ++static void vsir_validate_instruction(struct validation_context *ctx) ++{ ++ const struct vkd3d_shader_instruction *instruction = &ctx->parser->instructions.elements[ctx->instruction_idx]; ++ size_t i; ++ ++ ctx->parser->location = instruction->location; ++ ++ for (i = 0; i < instruction->dst_count; ++i) ++ vsir_validate_dst_param(ctx, &instruction->dst[i]); ++ ++ for (i = 0; i < instruction->src_count; ++i) ++ vsir_validate_src_param(ctx, &instruction->src[i]); ++ ++ if (instruction->handler_idx >= VKD3DSIH_INVALID) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", ++ instruction->handler_idx); ++ } ++} ++ ++void vsir_validate(struct vkd3d_shader_parser *parser) ++{ ++ struct validation_context ctx = { .parser = parser }; ++ ++ if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) ++ return; ++ ++ for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->instructions.count; ++ctx.instruction_idx) ++ vsir_validate_instruction(&ctx); ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h +index 4860cf5f90e..e1cb75e177c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.h ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h +@@ -22,7 +22,7 @@ + #define __VKD3D_SHADER_PREPROC_H + + #include "vkd3d_shader_private.h" +-#include "wine/rbtree.h" ++#include "rbtree.h" + + struct preproc_if_state + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 638764b81bc..8285b56a17c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -18,7 +18,7 @@ + */ + + #include "vkd3d_shader_private.h" +-#include "wine/rbtree.h" ++#include "rbtree.h" + + #include + #include +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 045fb6c5f64..58b7f030dac 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -2327,7 +2327,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + return; + } + +- ins->handler_idx = opcode_info->handler_idx; ++ vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); + if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + sm4->phase = ins->handler_idx; +@@ -2642,9 +2642,19 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + if (sm4->p.shader_version.type == VKD3D_SHADER_TYPE_HULL && !sm4->has_control_point_phase && !sm4->p.failed) + shader_sm4_validate_default_phase_index_ranges(sm4); + ++ if (!sm4->p.failed) ++ vsir_validate(&sm4->p); ++ ++ if (sm4->p.failed) ++ { ++ WARN("Failed to parse shader.\n"); ++ shader_sm4_destroy(&sm4->p); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ + *parser = &sm4->p; + +- return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; ++ return VKD3D_OK; + } + + static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index f25dbb04d69..077d0144bc5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -22,6 +22,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -452,6 +454,25 @@ static void init_scan_signature_info(const struct vkd3d_shader_compile_info *inf + } + } + ++static const struct vkd3d_debug_option vkd3d_shader_config_options[] = ++{ ++ {"force_validation", VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION}, /* force validation of internal shader representations */ ++}; ++ ++static uint64_t vkd3d_shader_init_config_flags(void) ++{ ++ uint64_t config_flags; ++ const char *config; ++ ++ config = getenv("VKD3D_SHADER_CONFIG"); ++ config_flags = vkd3d_parse_debug_options(config, vkd3d_shader_config_options, ARRAY_SIZE(vkd3d_shader_config_options)); ++ ++ if (config_flags) ++ TRACE("VKD3D_SHADER_CONFIG='%s'.\n", config); ++ ++ return config_flags; ++} ++ + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_message_context *message_context, const char *source_name, + const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, +@@ -463,6 +484,7 @@ bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, + parser->location.column = 0; + parser->shader_version = *version; + parser->ops = ops; ++ parser->config_flags = vkd3d_shader_init_config_flags(); + return shader_instruction_array_init(&parser->instructions, instruction_reserve); + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 9443df6c232..5fd930918be 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -49,7 +49,7 @@ + #include "vkd3d_common.h" + #include "vkd3d_memory.h" + #include "vkd3d_shader.h" +-#include "wine/list.h" ++#include "list.h" + + #include + #include +@@ -183,6 +183,12 @@ enum vkd3d_shader_error + VKD3D_SHADER_WARNING_DXIL_UNHANDLED_INTRINSIC = 8305, + + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED = 9000, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER = 9001, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE = 9002, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK = 9003, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS = 9004, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT = 9005, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE = 9006, + }; + + enum vkd3d_shader_opcode +@@ -585,6 +591,7 @@ enum vkd3d_shader_src_modifier + VKD3DSPSM_ABS = 11, + VKD3DSPSM_ABSNEG = 12, + VKD3DSPSM_NOT = 13, ++ VKD3DSPSM_COUNT, + }; + + #define VKD3DSP_WRITEMASK_0 0x1u /* .x r */ +@@ -599,6 +606,7 @@ enum vkd3d_shader_dst_modifier + VKD3DSPDM_SATURATE = 1, + VKD3DSPDM_PARTIALPRECISION = 2, + VKD3DSPDM_MSAMPCENTROID = 4, ++ VKD3DSPDM_MASK = 7, + }; + + enum vkd3d_shader_interpolation_mode +@@ -968,8 +976,15 @@ struct vkd3d_shader_primitive_type + unsigned int patch_vertex_count; + }; + ++struct vkd3d_shader_location ++{ ++ const char *source_name; ++ unsigned int line, column; ++}; ++ + struct vkd3d_shader_instruction + { ++ struct vkd3d_shader_location location; + enum vkd3d_shader_opcode handler_idx; + DWORD flags; + unsigned int dst_count; +@@ -1008,7 +1023,8 @@ struct vkd3d_shader_instruction + } declaration; + }; + +-void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx); ++void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, ++ enum vkd3d_shader_opcode handler_idx); + + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) + { +@@ -1035,12 +1051,6 @@ static inline bool register_is_constant(const struct vkd3d_shader_register *reg) + return (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_IMMCONST64); + } + +-struct vkd3d_shader_location +-{ +- const char *source_name; +- unsigned int line, column; +-}; +- + struct vkd3d_shader_param_node + { + struct vkd3d_shader_param_node *next; +@@ -1093,6 +1103,11 @@ bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_ + unsigned int dst, unsigned int src); + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); + ++enum vkd3d_shader_config_flags ++{ ++ VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001, ++}; ++ + struct vkd3d_shader_parser + { + struct vkd3d_shader_message_context *message_context; +@@ -1103,6 +1118,8 @@ struct vkd3d_shader_parser + struct vkd3d_shader_version shader_version; + const struct vkd3d_shader_parser_ops *ops; + struct vkd3d_shader_instruction_array instructions; ++ ++ uint64_t config_flags; + }; + + struct vkd3d_shader_parser_ops +@@ -1291,6 +1308,8 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, + int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + ++void vsir_validate(struct vkd3d_shader_parser *parser); ++ + static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( + enum vkd3d_data_type data_type) + { +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 42a98763438..3be45120d8c 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -5461,6 +5461,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + view_desc.miplevel_count = 1; + view_desc.layer_idx = view->info.texture.layer_idx; + view_desc.layer_count = view->info.texture.layer_count; ++ view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, + &uint_view)) +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index c33061073a3..69727e09cc7 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -2456,17 +2456,18 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach + } + + /* ID3D12Device */ +-static inline struct d3d12_device *impl_from_ID3D12Device1(ID3D12Device1 *iface) ++static inline struct d3d12_device *impl_from_ID3D12Device2(ID3D12Device2 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device1_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device2_iface); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device2 *iface, + REFIID riid, void **object) + { + TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); + +- if (IsEqualGUID(riid, &IID_ID3D12Device1) ++ if (IsEqualGUID(riid, &IID_ID3D12Device2) ++ || IsEqualGUID(riid, &IID_ID3D12Device1) + || IsEqualGUID(riid, &IID_ID3D12Device) + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) +@@ -2482,9 +2483,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device1 *ifac + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device2 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + ULONG refcount = InterlockedIncrement(&device->refcount); + + TRACE("%p increasing refcount to %u.\n", device, refcount); +@@ -2492,9 +2493,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device1 *iface) + return refcount; + } + +-static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device2 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + ULONG refcount = InterlockedDecrement(&device->refcount); + + TRACE("%p decreasing refcount to %u.\n", device, refcount); +@@ -2528,10 +2529,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device1 *iface) + return refcount; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device2 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2539,10 +2540,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device1 *ifac + return vkd3d_get_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device2 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", + iface, debugstr_guid(guid), data_size, data); +@@ -2550,19 +2551,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device1 *ifac + return vkd3d_set_private_data(&device->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device2 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&device->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device2 *iface, const WCHAR *name) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); + +@@ -2570,17 +2571,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device1 *iface, cons + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device1 *iface) ++static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device2 *iface) + { + TRACE("iface %p.\n", iface); + + return 1; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device2 *iface, + const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_command_queue *object; + HRESULT hr; + +@@ -2594,10 +2595,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device1 * + riid, command_queue); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device2 *iface, + D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_command_allocator *object; + HRESULT hr; + +@@ -2611,10 +2612,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic + riid, command_allocator); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device2 *iface, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2628,10 +2629,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device2 *iface, + const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_pipeline_state *object; + HRESULT hr; + +@@ -2645,11 +2646,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D + &IID_ID3D12PipelineState, riid, pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device2 *iface, + UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, + ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_command_list *object; + HRESULT hr; + +@@ -2772,10 +2773,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) + return true; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device2 *iface, + D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", + iface, feature, feature_data, feature_data_size); +@@ -3274,10 +3275,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device1 + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device2 *iface, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_descriptor_heap *object; + HRESULT hr; + +@@ -3291,7 +3292,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device1 + &IID_ID3D12DescriptorHeap, riid, descriptor_heap); + } + +-static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device1 *iface, ++static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device2 *iface, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { + TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); +@@ -3314,11 +3315,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device2 *iface, + UINT node_mask, const void *bytecode, SIZE_T bytecode_length, + REFIID riid, void **root_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_root_signature *object; + HRESULT hr; + +@@ -3334,10 +3335,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device1 + &IID_ID3D12RootSignature, riid, root_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device2 *iface, + const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); +@@ -3346,11 +3347,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device2 *iface, + ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", +@@ -3360,11 +3361,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device2 *iface, + ID3D12Resource *resource, ID3D12Resource *counter_resource, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", +@@ -3375,7 +3376,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device2 *iface, + ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3383,10 +3384,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device1 + iface, resource, desc, descriptor.ptr); + + d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device2(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device2 *iface, + ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, + D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +@@ -3394,13 +3395,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device1 + iface, resource, desc, descriptor.ptr); + + d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), +- impl_from_ID3D12Device1(iface), unsafe_impl_from_ID3D12Resource(resource), desc); ++ impl_from_ID3D12Device2(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + } + +-static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device2 *iface, + const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_desc tmp = {0}; + + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr); +@@ -3409,14 +3410,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device1 *iface, + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device2 *iface, + UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, + const UINT *dst_descriptor_range_sizes, + UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, + const UINT *src_descriptor_range_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; + unsigned int dst_range_size, src_range_size; + struct d3d12_descriptor_heap *dst_heap; +@@ -3472,7 +3473,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device1 *iface, + } + } + +-static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device2 *iface, + UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, + const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) +@@ -3487,10 +3488,10 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device1 * + } + + static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( +- ID3D12Device1 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, ++ ID3D12Device2 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + UINT count, const D3D12_RESOURCE_DESC *resource_descs) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + const D3D12_RESOURCE_DESC *desc; + uint64_t requested_alignment; + +@@ -3563,10 +3564,10 @@ invalid: + return info; + } + +-static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device1 *iface, ++static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device2 *iface, + D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + bool coherent; + + TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", +@@ -3606,12 +3607,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope + return heap_properties; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device2 *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_resource *object; + HRESULT hr; + +@@ -3630,10 +3631,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device2 *iface, + const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_heap *object; + HRESULT hr; + +@@ -3649,12 +3650,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device1 *iface, + return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device2 *iface, + ID3D12Heap *heap, UINT64 heap_offset, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_heap *heap_object; + struct d3d12_resource *object; + HRESULT hr; +@@ -3673,11 +3674,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device1 + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device2 *iface, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_resource *object; + HRESULT hr; + +@@ -3691,11 +3692,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic + return return_interface(&object->ID3D12Resource_iface, &IID_ID3D12Resource, iid, resource); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device2 *iface, + ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, + const WCHAR *name, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", + iface, object, attributes, access, debugstr_w(name, device->wchar_size), handle); +@@ -3703,7 +3704,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device1 * + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device2 *iface, + HANDLE handle, REFIID riid, void **object) + { + FIXME("iface %p, handle %p, riid %s, object %p stub!\n", +@@ -3712,10 +3713,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device1 *if + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device2 *iface, + const WCHAR *name, DWORD access, HANDLE *handle) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + FIXME("iface %p, name %s, access %#x, handle %p stub!\n", + iface, debugstr_w(name, device->wchar_size), access, handle); +@@ -3723,7 +3724,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device2 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", +@@ -3732,7 +3733,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device1 *iface, + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device2 *iface, + UINT object_count, ID3D12Pageable * const *objects) + { + FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", +@@ -3741,10 +3742,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device1 *iface, + return S_OK; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device2 *iface, + UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_fence *object; + HRESULT hr; + +@@ -3757,21 +3758,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device1 *iface, + return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device1 *iface) ++static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device2 *iface) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p.\n", iface); + + return device->removed_reason; + } + +-static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device2 *iface, + const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, + UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, + UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; + unsigned int width, height, depth, plane_count, sub_resources_per_plane; +@@ -3851,10 +3852,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device1 * + *total_bytes = total; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device2 *iface, + const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_query_heap *object; + HRESULT hr; + +@@ -3867,18 +3868,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device1 *ifa + return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device1 *iface, BOOL enable) ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device2 *iface, BOOL enable) + { + FIXME("iface %p, enable %#x stub!\n", iface, enable); + + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device2 *iface, + const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, + REFIID iid, void **command_signature) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + struct d3d12_command_signature *object; + HRESULT hr; + +@@ -3892,14 +3893,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic + &IID_ID3D12CommandSignature, iid, command_signature); + } + +-static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *iface, ++static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device2 *iface, + ID3D12Resource *resource, UINT *total_tile_count, + D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, + UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, + D3D12_SUBRESOURCE_TILING *sub_resource_tilings) + { + const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " + "standard_title_shape %p, sub_resource_tiling_count %p, " +@@ -3912,9 +3913,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device1 *ifac + sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); + } + +-static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface, LUID *luid) ++static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device2 *iface, LUID *luid) + { +- struct d3d12_device *device = impl_from_ID3D12Device1(iface); ++ struct d3d12_device *device = impl_from_ID3D12Device2(iface); + + TRACE("iface %p, luid %p.\n", iface, luid); + +@@ -3923,7 +3924,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device1 *iface + return luid; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device2 *iface, + const void *blob, SIZE_T blob_size, REFIID iid, void **lib) + { + FIXME("iface %p, blob %p, blob_size %lu, iid %s, lib %p stub!\n", iface, blob, blob_size, debugstr_guid(iid), lib); +@@ -3931,7 +3932,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device + return DXGI_ERROR_UNSUPPORTED; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device2 *iface, + ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, + D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) + { +@@ -3941,7 +3942,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( + return E_NOTIMPL; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device2 *iface, + UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) + { + FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); +@@ -3949,7 +3950,15 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device1 + return S_OK; + } + +-static const struct ID3D12Device1Vtbl d3d12_device_vtbl = ++static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device2 *iface, ++ const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID iid, void **pipeline_state) ++{ ++ FIXME("iface %p, desc %p, iid %s, pipeline_state %p stub!\n", iface, desc, debugstr_guid(iid), pipeline_state); ++ ++ return E_NOTIMPL; ++} ++ ++static const struct ID3D12Device2Vtbl d3d12_device_vtbl = + { + /* IUnknown methods */ + d3d12_device_QueryInterface, +@@ -4002,14 +4011,16 @@ static const struct ID3D12Device1Vtbl d3d12_device_vtbl = + d3d12_device_CreatePipelineLibrary, + d3d12_device_SetEventOnMultipleFenceCompletion, + d3d12_device_SetResidencyPriority, ++ /* ID3D12Device2 methods */ ++ d3d12_device_CreatePipelineState, + }; + +-struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface) ++struct d3d12_device *unsafe_impl_from_ID3D12Device2(ID3D12Device2 *iface) + { + if (!iface) + return NULL; + assert(iface->lpVtbl == &d3d12_device_vtbl); +- return impl_from_ID3D12Device1(iface); ++ return impl_from_ID3D12Device2(iface); + } + + static HRESULT d3d12_device_init(struct d3d12_device *device, +@@ -4018,7 +4029,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + const struct vkd3d_vk_device_procs *vk_procs; + HRESULT hr; + +- device->ID3D12Device1_iface.lpVtbl = &d3d12_device_vtbl; ++ device->ID3D12Device2_iface.lpVtbl = &d3d12_device_vtbl; + device->refcount = 1; + + vkd3d_instance_incref(device->vkd3d_instance = instance); +@@ -4215,28 +4226,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha + + IUnknown *vkd3d_get_device_parent(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device2((ID3D12Device2 *)device); + + return d3d12_device->parent; + } + + VkDevice vkd3d_get_vk_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device2((ID3D12Device2 *)device); + + return d3d12_device->vk_device; + } + + VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device2((ID3D12Device2 *)device); + + return d3d12_device->vk_physical_device; + } + + struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) + { +- struct d3d12_device *d3d12_device = impl_from_ID3D12Device1((ID3D12Device1 *)device); ++ struct d3d12_device *d3d12_device = impl_from_ID3D12Device2((ID3D12Device2 *)device); + + return d3d12_device->vkd3d_instance; + } +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index f3842958d96..0dfb4d379ca 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -2220,7 +2220,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, + HRESULT vkd3d_create_image_resource(ID3D12Device *device, + const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) + { +- struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device1((ID3D12Device1 *)device); ++ struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device2((ID3D12Device2 *)device); + struct d3d12_resource *object; + HRESULT hr; + +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +index 159560afd8e..7651acc3820 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +@@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + + if (!device) + { +- ID3D12Device_Release(&object->ID3D12Device1_iface); ++ ID3D12Device_Release(&object->ID3D12Device2_iface); + return S_FALSE; + } + +- return return_interface(&object->ID3D12Device1_iface, &IID_ID3D12Device, iid, device); ++ return return_interface(&object->ID3D12Device2_iface, &IID_ID3D12Device, iid, device); + } + + /* ID3D12RootSignatureDeserializer */ +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 363a7132c3a..fceb06fc05a 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -31,8 +31,8 @@ + #include "vkd3d_blob.h" + #include "vkd3d_memory.h" + #include "vkd3d_utf8.h" +-#include "wine/list.h" +-#include "wine/rbtree.h" ++#include "list.h" ++#include "rbtree.h" + + #include "vkd3d.h" + #include "vkd3d_shader.h" +@@ -1709,7 +1709,7 @@ struct vkd3d_desc_object_cache + /* ID3D12Device */ + struct d3d12_device + { +- ID3D12Device1 ID3D12Device1_iface; ++ ID3D12Device2 ID3D12Device2_iface; + LONG refcount; + + VkDevice vk_device; +@@ -1775,27 +1775,27 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 + bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); + void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, + const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); +-struct d3d12_device *unsafe_impl_from_ID3D12Device1(ID3D12Device1 *iface); ++struct d3d12_device *unsafe_impl_from_ID3D12Device2(ID3D12Device2 *iface); + + static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) + { +- return ID3D12Device1_QueryInterface(&device->ID3D12Device1_iface, iid, object); ++ return ID3D12Device2_QueryInterface(&device->ID3D12Device2_iface, iid, object); + } + + static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) + { +- return ID3D12Device1_AddRef(&device->ID3D12Device1_iface); ++ return ID3D12Device2_AddRef(&device->ID3D12Device2_iface); + } + + static inline ULONG d3d12_device_release(struct d3d12_device *device) + { +- return ID3D12Device1_Release(&device->ID3D12Device1_iface); ++ return ID3D12Device2_Release(&device->ID3D12Device2_iface); + } + + static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, + D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) + { +- return ID3D12Device1_GetDescriptorHandleIncrementSize(&device->ID3D12Device1_iface, descriptor_type); ++ return ID3D12Device2_GetDescriptorHandleIncrementSize(&device->ID3D12Device2_iface, descriptor_type); + } + + /* utils */ +-- +2.40.1 + diff --git a/staging/upstream-commit b/staging/upstream-commit index 96845d6c..5b707e9a 100644 --- a/staging/upstream-commit +++ b/staging/upstream-commit @@ -1 +1 @@ -ee17400c05d88fa29d0b895fa01902adfc91ba7f +56e58cbed19bb5307598d4f433d5b3f01af91a1b