diff --git a/patches/vkd3d-latest/0001-wined3d-rename-shader_extract_from_dxbc-to-wined3d_s.patch b/patches/vkd3d-latest/0001-wined3d-rename-shader_extract_from_dxbc-to-wined3d_s.patch new file mode 100644 index 00000000..3cd8988b --- /dev/null +++ b/patches/vkd3d-latest/0001-wined3d-rename-shader_extract_from_dxbc-to-wined3d_s.patch @@ -0,0 +1,55 @@ +From b349a2915750a5de22339a8bd12ec10ad35d61da Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 17 May 2023 08:13:47 +1000 +Subject: [PATCH 1/2] wined3d: rename shader_extract_from_dxbc to + wined3d_shader_extract_from_dxbc + +Stops a linker error of duplicate name when using upstream vk3d3. +--- + dlls/wined3d/shader.c | 2 +- + dlls/wined3d/shader_sm4.c | 2 +- + dlls/wined3d/wined3d_private.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c +index b4df79508ec..2728929045f 100644 +--- a/dlls/wined3d/shader.c ++++ b/dlls/wined3d/shader.c +@@ -2481,7 +2481,7 @@ static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device + shader->byte_code_size = desc->byte_code_size; + + max_version = shader_max_version_from_feature_level(device->cs->c.state->feature_level); +- if (FAILED(hr = shader_extract_from_dxbc(shader, max_version, &source_type))) ++ if (FAILED(hr = wined3d_shader_extract_from_dxbc(shader, max_version, &source_type))) + goto fail; + + if (!(shader->frontend = shader_select_frontend(source_type))) +diff --git a/dlls/wined3d/shader_sm4.c b/dlls/wined3d/shader_sm4.c +index 78abdbe81ac..74391cc1be8 100644 +--- a/dlls/wined3d/shader_sm4.c ++++ b/dlls/wined3d/shader_sm4.c +@@ -2077,7 +2077,7 @@ static HRESULT shader_dxbc_process_section(struct wined3d_shader *shader, unsign + return S_OK; + } + +-HRESULT shader_extract_from_dxbc(struct wined3d_shader *shader, ++HRESULT wined3d_shader_extract_from_dxbc(struct wined3d_shader *shader, + unsigned int max_shader_version, enum vkd3d_shader_source_type *source_type) + { + const struct vkd3d_shader_code dxbc = {.code = shader->byte_code, .size = shader->byte_code_size}; +diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h +index a586d6847bf..001534e8c38 100644 +--- a/dlls/wined3d/wined3d_private.h ++++ b/dlls/wined3d/wined3d_private.h +@@ -1466,7 +1466,7 @@ struct wined3d_shader_frontend + extern const struct wined3d_shader_frontend sm1_shader_frontend DECLSPEC_HIDDEN; + extern const struct wined3d_shader_frontend sm4_shader_frontend DECLSPEC_HIDDEN; + +-HRESULT shader_extract_from_dxbc(struct wined3d_shader *shader, ++HRESULT wined3d_shader_extract_from_dxbc(struct wined3d_shader *shader, + unsigned int max_shader_version, enum vkd3d_shader_source_type *source_type) DECLSPEC_HIDDEN; + BOOL shader_get_stream_output_register_info(const struct wined3d_shader *shader, + const struct wined3d_stream_output_element *so_element, unsigned int *register_idx, +-- +2.40.1 + diff --git a/patches/vkd3d-latest/0002-Update-vkd3d-to-bb680e73de4ac22700ec89b1f466eea8da0a.patch b/patches/vkd3d-latest/0002-Update-vkd3d-to-bb680e73de4ac22700ec89b1f466eea8da0a.patch new file mode 100644 index 00000000..7ebe4988 --- /dev/null +++ b/patches/vkd3d-latest/0002-Update-vkd3d-to-bb680e73de4ac22700ec89b1f466eea8da0a.patch @@ -0,0 +1,29840 @@ +From d87f229d13f4b43a24a30f61fe55c6783e2b395d Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 17 May 2023 08:35:40 +1000 +Subject: [PATCH 2/2] Update vkd3d to bb680e73de4ac22700ec89b1f466eea8da0a2120 + +--- + libs/vkd3d/Makefile.in | 6 +- + libs/vkd3d/include/list.h | 270 + + libs/vkd3d/include/private/list.h | 270 + + libs/vkd3d/include/private/rbtree.h | 378 ++ + libs/vkd3d/include/private/vkd3d_common.h | 3 +- + libs/vkd3d/include/private/vkd3d_debug.h | 2 +- + libs/vkd3d/include/private/vkd3d_test.h | 432 ++ + libs/vkd3d/include/vkd3d_d3d9types.h | 237 + + libs/vkd3d/include/vkd3d_d3dcompiler.h | 74 + + libs/vkd3d/include/vkd3d_utils.h | 108 + + libs/vkd3d/include/vkd3d_windows.h | 284 + + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + + libs/vkd3d/libs/vkd3d-common/debug.c | 4 +- + .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 23 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1121 +++- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1773 +----- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 551 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 220 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 8 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2316 +++++--- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1524 +++-- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 289 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c | 980 ---- + libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c | 2531 -------- + libs/vkd3d/libs/vkd3d-shader/ir.c | 1073 ++++ + libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 7 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 1311 ++--- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 5219 +++++++++++++++++ + .../libs/vkd3d-shader/vkd3d_shader_main.c | 241 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 91 +- + libs/vkd3d/libs/vkd3d/command.c | 160 +- + libs/vkd3d/libs/vkd3d/device.c | 205 +- + libs/vkd3d/libs/vkd3d/resource.c | 935 ++- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 238 +- + 35 files changed, 14192 insertions(+), 8695 deletions(-) + create mode 100644 libs/vkd3d/include/list.h + create mode 100644 libs/vkd3d/include/private/list.h + create mode 100644 libs/vkd3d/include/private/rbtree.h + create mode 100644 libs/vkd3d/include/private/vkd3d_test.h + create mode 100644 libs/vkd3d/include/vkd3d_d3d9types.h + create mode 100644 libs/vkd3d/include/vkd3d_d3dcompiler.h + create mode 100644 libs/vkd3d/include/vkd3d_utils.h + create mode 100644 libs/vkd3d/include/vkd3d_windows.h + rename libs/vkd3d/libs/vkd3d-shader/{trace.c => d3d_asm.c} (98%) + delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c + delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c + create mode 100644 libs/vkd3d/libs/vkd3d-shader/ir.c + create mode 100644 libs/vkd3d/libs/vkd3d-shader/tpf.c + +diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in +index 0ed4e27ad83..1ba0e9f71e1 100644 +--- a/libs/vkd3d/Makefile.in ++++ b/libs/vkd3d/Makefile.in +@@ -14,6 +14,7 @@ SOURCES = \ + libs/vkd3d-common/memory.c \ + libs/vkd3d-common/utf8.c \ + libs/vkd3d-shader/checksum.c \ ++ libs/vkd3d-shader/d3d_asm.c \ + libs/vkd3d-shader/d3dbc.c \ + libs/vkd3d-shader/dxbc.c \ + libs/vkd3d-shader/glsl.c \ +@@ -22,12 +23,11 @@ SOURCES = \ + libs/vkd3d-shader/hlsl.y \ + libs/vkd3d-shader/hlsl_codegen.c \ + libs/vkd3d-shader/hlsl_constant_ops.c \ +- libs/vkd3d-shader/hlsl_sm1.c \ +- libs/vkd3d-shader/hlsl_sm4.c \ ++ libs/vkd3d-shader/ir.c \ + libs/vkd3d-shader/preproc.l \ + libs/vkd3d-shader/preproc.y \ + libs/vkd3d-shader/spirv.c \ +- libs/vkd3d-shader/trace.c \ ++ libs/vkd3d-shader/tpf.c \ + libs/vkd3d-shader/vkd3d_shader_main.c \ + libs/vkd3d/command.c \ + libs/vkd3d/device.c \ +diff --git a/libs/vkd3d/include/list.h b/libs/vkd3d/include/list.h +new file mode 100644 +index 00000000000..2e1d95f3fd4 +--- /dev/null ++++ b/libs/vkd3d/include/list.h +@@ -0,0 +1,270 @@ ++/* ++ * Linked lists support ++ * ++ * Copyright (C) 2002 Alexandre Julliard ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __WINE_SERVER_LIST_H ++#define __WINE_SERVER_LIST_H ++ ++#include ++ ++struct list ++{ ++ struct list *next; ++ struct list *prev; ++}; ++ ++/* Define a list like so: ++ * ++ * struct gadget ++ * { ++ * struct list entry; <-- doesn't have to be the first item in the struct ++ * int a, b; ++ * }; ++ * ++ * static struct list global_gadgets = LIST_INIT( global_gadgets ); ++ * ++ * or ++ * ++ * struct some_global_thing ++ * { ++ * struct list gadgets; ++ * }; ++ * ++ * list_init( &some_global_thing->gadgets ); ++ * ++ * Manipulate it like this: ++ * ++ * list_add_head( &global_gadgets, &new_gadget->entry ); ++ * list_remove( &new_gadget->entry ); ++ * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); ++ * ++ * And to iterate over it: ++ * ++ * struct gadget *gadget; ++ * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) ++ * { ++ * ... ++ * } ++ * ++ */ ++ ++/* add an element after the specified one */ ++static inline void list_add_after( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem->next; ++ to_add->prev = elem; ++ elem->next->prev = to_add; ++ elem->next = to_add; ++} ++ ++/* add an element before the specified one */ ++static inline void list_add_before( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem; ++ to_add->prev = elem->prev; ++ elem->prev->next = to_add; ++ elem->prev = to_add; ++} ++ ++/* add element at the head of the list */ ++static inline void list_add_head( struct list *list, struct list *elem ) ++{ ++ list_add_after( list, elem ); ++} ++ ++/* add element at the tail of the list */ ++static inline void list_add_tail( struct list *list, struct list *elem ) ++{ ++ list_add_before( list, elem ); ++} ++ ++/* remove an element from its list */ ++static inline void list_remove( struct list *elem ) ++{ ++ elem->next->prev = elem->prev; ++ elem->prev->next = elem->next; ++} ++ ++/* get the next element */ ++static inline struct list *list_next( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->next; ++ if (elem->next == list) ret = NULL; ++ return ret; ++} ++ ++/* get the previous element */ ++static inline struct list *list_prev( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->prev; ++ if (elem->prev == list) ret = NULL; ++ return ret; ++} ++ ++/* get the first element */ ++static inline struct list *list_head( const struct list *list ) ++{ ++ return list_next( list, list ); ++} ++ ++/* get the last element */ ++static inline struct list *list_tail( const struct list *list ) ++{ ++ return list_prev( list, list ); ++} ++ ++/* check if a list is empty */ ++static inline int list_empty( const struct list *list ) ++{ ++ return list->next == list; ++} ++ ++/* initialize a list */ ++static inline void list_init( struct list *list ) ++{ ++ list->next = list->prev = list; ++} ++ ++/* count the elements of a list */ ++static inline unsigned int list_count( const struct list *list ) ++{ ++ unsigned count = 0; ++ const struct list *ptr; ++ for (ptr = list->next; ptr != list; ptr = ptr->next) count++; ++ return count; ++} ++ ++/* move all elements from src to before the specified element */ ++static inline void list_move_before( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->prev->next = src->next; ++ src->next->prev = dst->prev; ++ dst->prev = src->prev; ++ src->prev->next = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to after the specified element */ ++static inline void list_move_after( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->next->prev = src->prev; ++ src->prev->next = dst->next; ++ dst->next = src->next; ++ src->next->prev = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to the head of dst */ ++static inline void list_move_head( struct list *dst, struct list *src ) ++{ ++ list_move_after( dst, src ); ++} ++ ++/* move all elements from src to the tail of dst */ ++static inline void list_move_tail( struct list *dst, struct list *src ) ++{ ++ list_move_before( dst, src ); ++} ++ ++/* move the slice of elements from begin to end inclusive to the head of dst */ ++static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_next = dst->next; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst->next = begin; ++ dst_next->prev = end; ++ begin->prev = dst; ++ end->next = dst_next; ++} ++ ++/* move the slice of elements from begin to end inclusive to the tail of dst */ ++static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_prev = dst->prev; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst_prev->next = begin; ++ dst->prev = end; ++ begin->prev = dst_prev; ++ end->next = dst; ++} ++ ++/* iterate through the list */ ++#define LIST_FOR_EACH(cursor,list) \ ++ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) ++ ++/* iterate through the list, with safety against removal */ ++#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ ++ for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->next) ++ ++/* iterate through the list using a list entry */ ++#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->next, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.next, type, field)) ++ ++/* iterate through the list using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->next, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) ++ ++/* iterate through the list in reverse order */ ++#define LIST_FOR_EACH_REV(cursor,list) \ ++ for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) ++ ++/* iterate through the list in reverse order, with safety against removal */ ++#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ ++ for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->prev) ++ ++/* iterate through the list in reverse order using a list entry */ ++#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->prev, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.prev, type, field)) ++ ++/* iterate through the list in reverse order using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) ++ ++/* macros for statically initialized lists */ ++#undef LIST_INIT ++#define LIST_INIT(list) { &(list), &(list) } ++ ++/* get pointer to object containing list element */ ++#undef LIST_ENTRY ++#define LIST_ENTRY(elem, type, field) \ ++ ((type *)((char *)(elem) - offsetof(type, field))) ++ ++#endif /* __WINE_SERVER_LIST_H */ +diff --git a/libs/vkd3d/include/private/list.h b/libs/vkd3d/include/private/list.h +new file mode 100644 +index 00000000000..2e1d95f3fd4 +--- /dev/null ++++ b/libs/vkd3d/include/private/list.h +@@ -0,0 +1,270 @@ ++/* ++ * Linked lists support ++ * ++ * Copyright (C) 2002 Alexandre Julliard ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __WINE_SERVER_LIST_H ++#define __WINE_SERVER_LIST_H ++ ++#include ++ ++struct list ++{ ++ struct list *next; ++ struct list *prev; ++}; ++ ++/* Define a list like so: ++ * ++ * struct gadget ++ * { ++ * struct list entry; <-- doesn't have to be the first item in the struct ++ * int a, b; ++ * }; ++ * ++ * static struct list global_gadgets = LIST_INIT( global_gadgets ); ++ * ++ * or ++ * ++ * struct some_global_thing ++ * { ++ * struct list gadgets; ++ * }; ++ * ++ * list_init( &some_global_thing->gadgets ); ++ * ++ * Manipulate it like this: ++ * ++ * list_add_head( &global_gadgets, &new_gadget->entry ); ++ * list_remove( &new_gadget->entry ); ++ * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); ++ * ++ * And to iterate over it: ++ * ++ * struct gadget *gadget; ++ * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) ++ * { ++ * ... ++ * } ++ * ++ */ ++ ++/* add an element after the specified one */ ++static inline void list_add_after( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem->next; ++ to_add->prev = elem; ++ elem->next->prev = to_add; ++ elem->next = to_add; ++} ++ ++/* add an element before the specified one */ ++static inline void list_add_before( struct list *elem, struct list *to_add ) ++{ ++ to_add->next = elem; ++ to_add->prev = elem->prev; ++ elem->prev->next = to_add; ++ elem->prev = to_add; ++} ++ ++/* add element at the head of the list */ ++static inline void list_add_head( struct list *list, struct list *elem ) ++{ ++ list_add_after( list, elem ); ++} ++ ++/* add element at the tail of the list */ ++static inline void list_add_tail( struct list *list, struct list *elem ) ++{ ++ list_add_before( list, elem ); ++} ++ ++/* remove an element from its list */ ++static inline void list_remove( struct list *elem ) ++{ ++ elem->next->prev = elem->prev; ++ elem->prev->next = elem->next; ++} ++ ++/* get the next element */ ++static inline struct list *list_next( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->next; ++ if (elem->next == list) ret = NULL; ++ return ret; ++} ++ ++/* get the previous element */ ++static inline struct list *list_prev( const struct list *list, const struct list *elem ) ++{ ++ struct list *ret = elem->prev; ++ if (elem->prev == list) ret = NULL; ++ return ret; ++} ++ ++/* get the first element */ ++static inline struct list *list_head( const struct list *list ) ++{ ++ return list_next( list, list ); ++} ++ ++/* get the last element */ ++static inline struct list *list_tail( const struct list *list ) ++{ ++ return list_prev( list, list ); ++} ++ ++/* check if a list is empty */ ++static inline int list_empty( const struct list *list ) ++{ ++ return list->next == list; ++} ++ ++/* initialize a list */ ++static inline void list_init( struct list *list ) ++{ ++ list->next = list->prev = list; ++} ++ ++/* count the elements of a list */ ++static inline unsigned int list_count( const struct list *list ) ++{ ++ unsigned count = 0; ++ const struct list *ptr; ++ for (ptr = list->next; ptr != list; ptr = ptr->next) count++; ++ return count; ++} ++ ++/* move all elements from src to before the specified element */ ++static inline void list_move_before( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->prev->next = src->next; ++ src->next->prev = dst->prev; ++ dst->prev = src->prev; ++ src->prev->next = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to after the specified element */ ++static inline void list_move_after( struct list *dst, struct list *src ) ++{ ++ if (list_empty(src)) return; ++ ++ dst->next->prev = src->prev; ++ src->prev->next = dst->next; ++ dst->next = src->next; ++ src->next->prev = dst; ++ list_init(src); ++} ++ ++/* move all elements from src to the head of dst */ ++static inline void list_move_head( struct list *dst, struct list *src ) ++{ ++ list_move_after( dst, src ); ++} ++ ++/* move all elements from src to the tail of dst */ ++static inline void list_move_tail( struct list *dst, struct list *src ) ++{ ++ list_move_before( dst, src ); ++} ++ ++/* move the slice of elements from begin to end inclusive to the head of dst */ ++static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_next = dst->next; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst->next = begin; ++ dst_next->prev = end; ++ begin->prev = dst; ++ end->next = dst_next; ++} ++ ++/* move the slice of elements from begin to end inclusive to the tail of dst */ ++static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) ++{ ++ struct list *dst_prev = dst->prev; ++ begin->prev->next = end->next; ++ end->next->prev = begin->prev; ++ dst_prev->next = begin; ++ dst->prev = end; ++ begin->prev = dst_prev; ++ end->next = dst; ++} ++ ++/* iterate through the list */ ++#define LIST_FOR_EACH(cursor,list) \ ++ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) ++ ++/* iterate through the list, with safety against removal */ ++#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ ++ for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->next) ++ ++/* iterate through the list using a list entry */ ++#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->next, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.next, type, field)) ++ ++/* iterate through the list using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->next, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) ++ ++/* iterate through the list in reverse order */ ++#define LIST_FOR_EACH_REV(cursor,list) \ ++ for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) ++ ++/* iterate through the list in reverse order, with safety against removal */ ++#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ ++ for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ ++ (cursor) != (list); \ ++ (cursor) = (cursor2), (cursor2) = (cursor)->prev) ++ ++/* iterate through the list in reverse order using a list entry */ ++#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ ++ for ((elem) = LIST_ENTRY((list)->prev, type, field); \ ++ &(elem)->field != (list); \ ++ (elem) = LIST_ENTRY((elem)->field.prev, type, field)) ++ ++/* iterate through the list in reverse order using a list entry, with safety against removal */ ++#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ ++ for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ ++ &(cursor)->field != (list); \ ++ (cursor) = (cursor2), \ ++ (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) ++ ++/* macros for statically initialized lists */ ++#undef LIST_INIT ++#define LIST_INIT(list) { &(list), &(list) } ++ ++/* get pointer to object containing list element */ ++#undef LIST_ENTRY ++#define LIST_ENTRY(elem, type, field) \ ++ ((type *)((char *)(elem) - offsetof(type, field))) ++ ++#endif /* __WINE_SERVER_LIST_H */ +diff --git a/libs/vkd3d/include/private/rbtree.h b/libs/vkd3d/include/private/rbtree.h +new file mode 100644 +index 00000000000..b5d38bca54c +--- /dev/null ++++ b/libs/vkd3d/include/private/rbtree.h +@@ -0,0 +1,378 @@ ++/* ++ * Red-black search tree support ++ * ++ * Copyright 2009 Henri Verbeet ++ * Copyright 2009 Andrew Riedi ++ * Copyright 2016 Jacek Caban for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __WINE_WINE_RBTREE_H ++#define __WINE_WINE_RBTREE_H ++ ++#define RB_ENTRY_VALUE(element, type, field) \ ++ ((type *)((char *)(element) - offsetof(type, field))) ++ ++struct rb_entry ++{ ++ struct rb_entry *parent; ++ struct rb_entry *left; ++ struct rb_entry *right; ++ unsigned int flags; ++}; ++ ++typedef int (*rb_compare_func)(const void *key, const struct rb_entry *entry); ++ ++struct rb_tree ++{ ++ rb_compare_func compare; ++ struct rb_entry *root; ++}; ++ ++typedef void (rb_traverse_func)(struct rb_entry *entry, void *context); ++ ++#define RB_FLAG_RED 0x1 ++ ++static inline int rb_is_red(struct rb_entry *entry) ++{ ++ return entry && (entry->flags & RB_FLAG_RED); ++} ++ ++static inline void rb_rotate_left(struct rb_tree *tree, struct rb_entry *e) ++{ ++ struct rb_entry *right = e->right; ++ ++ if (!e->parent) ++ tree->root = right; ++ else if (e->parent->left == e) ++ e->parent->left = right; ++ else ++ e->parent->right = right; ++ ++ e->right = right->left; ++ if (e->right) e->right->parent = e; ++ right->left = e; ++ right->parent = e->parent; ++ e->parent = right; ++} ++ ++static inline void rb_rotate_right(struct rb_tree *tree, struct rb_entry *e) ++{ ++ struct rb_entry *left = e->left; ++ ++ if (!e->parent) ++ tree->root = left; ++ else if (e->parent->left == e) ++ e->parent->left = left; ++ else ++ e->parent->right = left; ++ ++ e->left = left->right; ++ if (e->left) e->left->parent = e; ++ left->right = e; ++ left->parent = e->parent; ++ e->parent = left; ++} ++ ++static inline void rb_flip_color(struct rb_entry *entry) ++{ ++ entry->flags ^= RB_FLAG_RED; ++ entry->left->flags ^= RB_FLAG_RED; ++ entry->right->flags ^= RB_FLAG_RED; ++} ++ ++static inline struct rb_entry *rb_head(struct rb_entry *iter) ++{ ++ if (!iter) return NULL; ++ while (iter->left) iter = iter->left; ++ return iter; ++} ++ ++static inline struct rb_entry *rb_next(struct rb_entry *iter) ++{ ++ if (iter->right) return rb_head(iter->right); ++ while (iter->parent && iter->parent->right == iter) iter = iter->parent; ++ return iter->parent; ++} ++ ++static inline struct rb_entry *rb_postorder_head(struct rb_entry *iter) ++{ ++ if (!iter) return NULL; ++ ++ for (;;) { ++ while (iter->left) iter = iter->left; ++ if (!iter->right) return iter; ++ iter = iter->right; ++ } ++} ++ ++static inline struct rb_entry *rb_postorder_next(struct rb_entry *iter) ++{ ++ if (!iter->parent) return NULL; ++ if (iter == iter->parent->right || !iter->parent->right) return iter->parent; ++ return rb_postorder_head(iter->parent->right); ++} ++ ++/* iterate through the tree */ ++#define RB_FOR_EACH(cursor, tree) \ ++ for ((cursor) = rb_head((tree)->root); (cursor); (cursor) = rb_next(cursor)) ++ ++/* iterate through the tree using a tree entry */ ++#define RB_FOR_EACH_ENTRY(elem, tree, type, field) \ ++ for ((elem) = RB_ENTRY_VALUE(rb_head((tree)->root), type, field); \ ++ (elem) != RB_ENTRY_VALUE(0, type, field); \ ++ (elem) = RB_ENTRY_VALUE(rb_next(&elem->field), type, field)) ++ ++/* iterate through the tree using using postorder, making it safe to free the entry */ ++#define RB_FOR_EACH_DESTRUCTOR(cursor, cursor2, tree) \ ++ for ((cursor) = rb_postorder_head((tree)->root); \ ++ (cursor) && (((cursor2) = rb_postorder_next(cursor)) || 1); \ ++ (cursor) = (cursor2)) ++ ++/* iterate through the tree using a tree entry and postorder, making it safe to free the entry */ ++#define RB_FOR_EACH_ENTRY_DESTRUCTOR(elem, elem2, tree, type, field) \ ++ for ((elem) = RB_ENTRY_VALUE(rb_postorder_head((tree)->root), type, field); \ ++ (elem) != WINE_RB_ENTRY_VALUE(0, type, field) \ ++ && (((elem2) = RB_ENTRY_VALUE(rb_postorder_next(&(elem)->field), type, field)) || 1); \ ++ (elem) = (elem2)) ++ ++ ++static inline void rb_postorder(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ struct rb_entry *iter, *next; ++ RB_FOR_EACH_DESTRUCTOR(iter, next, tree) callback(iter, context); ++} ++ ++static inline void rb_init(struct rb_tree *tree, rb_compare_func compare) ++{ ++ tree->compare = compare; ++ tree->root = NULL; ++} ++ ++static inline void rb_for_each_entry(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ struct rb_entry *iter; ++ RB_FOR_EACH(iter, tree) callback(iter, context); ++} ++ ++static inline void rb_clear(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ /* Note that we use postorder here because the callback will likely free the entry. */ ++ if (callback) rb_postorder(tree, callback, context); ++ tree->root = NULL; ++} ++ ++static inline void rb_destroy(struct rb_tree *tree, rb_traverse_func *callback, void *context) ++{ ++ rb_clear(tree, callback, context); ++} ++ ++static inline struct rb_entry *rb_get(const struct rb_tree *tree, const void *key) ++{ ++ struct rb_entry *entry = tree->root; ++ while (entry) ++ { ++ int c = tree->compare(key, entry); ++ if (!c) return entry; ++ entry = c < 0 ? entry->left : entry->right; ++ } ++ return NULL; ++} ++ ++static inline int rb_put(struct rb_tree *tree, const void *key, struct rb_entry *entry) ++{ ++ struct rb_entry **iter = &tree->root, *parent = tree->root; ++ ++ while (*iter) ++ { ++ int c; ++ ++ parent = *iter; ++ c = tree->compare(key, parent); ++ if (!c) return -1; ++ else if (c < 0) iter = &parent->left; ++ else iter = &parent->right; ++ } ++ ++ entry->flags = RB_FLAG_RED; ++ entry->parent = parent; ++ entry->left = NULL; ++ entry->right = NULL; ++ *iter = entry; ++ ++ while (rb_is_red(entry->parent)) ++ { ++ if (entry->parent == entry->parent->parent->left) ++ { ++ if (rb_is_red(entry->parent->parent->right)) ++ { ++ rb_flip_color(entry->parent->parent); ++ entry = entry->parent->parent; ++ } ++ else ++ { ++ if (entry == entry->parent->right) ++ { ++ entry = entry->parent; ++ rb_rotate_left(tree, entry); ++ } ++ entry->parent->flags &= ~RB_FLAG_RED; ++ entry->parent->parent->flags |= RB_FLAG_RED; ++ rb_rotate_right(tree, entry->parent->parent); ++ } ++ } ++ else ++ { ++ if (rb_is_red(entry->parent->parent->left)) ++ { ++ rb_flip_color(entry->parent->parent); ++ entry = entry->parent->parent; ++ } ++ else ++ { ++ if (entry == entry->parent->left) ++ { ++ entry = entry->parent; ++ rb_rotate_right(tree, entry); ++ } ++ entry->parent->flags &= ~RB_FLAG_RED; ++ entry->parent->parent->flags |= RB_FLAG_RED; ++ rb_rotate_left(tree, entry->parent->parent); ++ } ++ } ++ } ++ ++ tree->root->flags &= ~RB_FLAG_RED; ++ ++ return 0; ++} ++ ++static inline void rb_remove(struct rb_tree *tree, struct rb_entry *entry) ++{ ++ struct rb_entry *iter, *child, *parent, *w; ++ int need_fixup; ++ ++ if (entry->right && entry->left) ++ for(iter = entry->right; iter->left; iter = iter->left); ++ else ++ iter = entry; ++ ++ child = iter->left ? iter->left : iter->right; ++ ++ if (!iter->parent) ++ tree->root = child; ++ else if (iter == iter->parent->left) ++ iter->parent->left = child; ++ else ++ iter->parent->right = child; ++ ++ if (child) child->parent = iter->parent; ++ parent = iter->parent; ++ ++ need_fixup = !rb_is_red(iter); ++ ++ if (entry != iter) ++ { ++ *iter = *entry; ++ if (!iter->parent) ++ tree->root = iter; ++ else if (entry == iter->parent->left) ++ iter->parent->left = iter; ++ else ++ iter->parent->right = iter; ++ ++ if (iter->right) iter->right->parent = iter; ++ if (iter->left) iter->left->parent = iter; ++ if (parent == entry) parent = iter; ++ } ++ ++ if (need_fixup) ++ { ++ while (parent && !rb_is_red(child)) ++ { ++ if (child == parent->left) ++ { ++ w = parent->right; ++ if (rb_is_red(w)) ++ { ++ w->flags &= ~RB_FLAG_RED; ++ parent->flags |= RB_FLAG_RED; ++ rb_rotate_left(tree, parent); ++ w = parent->right; ++ } ++ if (rb_is_red(w->left) || rb_is_red(w->right)) ++ { ++ if (!rb_is_red(w->right)) ++ { ++ w->left->flags &= ~RB_FLAG_RED; ++ w->flags |= RB_FLAG_RED; ++ rb_rotate_right(tree, w); ++ w = parent->right; ++ } ++ w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); ++ parent->flags &= ~RB_FLAG_RED; ++ if (w->right) ++ w->right->flags &= ~RB_FLAG_RED; ++ rb_rotate_left(tree, parent); ++ child = NULL; ++ break; ++ } ++ } ++ else ++ { ++ w = parent->left; ++ if (rb_is_red(w)) ++ { ++ w->flags &= ~RB_FLAG_RED; ++ parent->flags |= RB_FLAG_RED; ++ rb_rotate_right(tree, parent); ++ w = parent->left; ++ } ++ if (rb_is_red(w->left) || rb_is_red(w->right)) ++ { ++ if (!rb_is_red(w->left)) ++ { ++ w->right->flags &= ~RB_FLAG_RED; ++ w->flags |= RB_FLAG_RED; ++ rb_rotate_left(tree, w); ++ w = parent->left; ++ } ++ w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); ++ parent->flags &= ~RB_FLAG_RED; ++ if (w->left) ++ w->left->flags &= ~RB_FLAG_RED; ++ rb_rotate_right(tree, parent); ++ child = NULL; ++ break; ++ } ++ } ++ w->flags |= RB_FLAG_RED; ++ child = parent; ++ parent = child->parent; ++ } ++ if (child) child->flags &= ~RB_FLAG_RED; ++ } ++ ++ if (tree->root) tree->root->flags &= ~RB_FLAG_RED; ++} ++ ++static inline void rb_remove_key(struct rb_tree *tree, const void *key) ++{ ++ struct rb_entry *entry = rb_get(tree, key); ++ if (entry) rb_remove(tree, entry); ++} ++ ++#endif /* __WINE_WINE_RBTREE_H */ +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index 3cf0422596c..1ac23b4a085 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -84,7 +84,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +@@ -249,6 +249,7 @@ static inline LONG InterlockedDecrement(LONG volatile *x) + # else + # error "InterlockedDecrement() not implemented for this platform" + # endif ++ + #endif /* _WIN32 */ + + static inline void vkd3d_parse_version(const char *version, int *major, int *minor) +diff --git a/libs/vkd3d/include/private/vkd3d_debug.h b/libs/vkd3d/include/private/vkd3d_debug.h +index 4f6d43af12f..6708cad344f 100644 +--- a/libs/vkd3d/include/private/vkd3d_debug.h ++++ b/libs/vkd3d/include/private/vkd3d_debug.h +@@ -91,7 +91,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); + + #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) + +-#define VKD3D_DEBUG_ENV_NAME(name) const char *vkd3d_dbg_env_name = name ++#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name + + static inline const char *debugstr_guid(const GUID *guid) + { +diff --git a/libs/vkd3d/include/private/vkd3d_test.h b/libs/vkd3d/include/private/vkd3d_test.h +new file mode 100644 +index 00000000000..081443c4fa6 +--- /dev/null ++++ b/libs/vkd3d/include/private/vkd3d_test.h +@@ -0,0 +1,432 @@ ++/* ++ * Copyright 2016 Józef Kucia for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_TEST_H ++#define __VKD3D_TEST_H ++ ++#include "vkd3d_common.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern const char *vkd3d_test_name; ++extern const char *vkd3d_test_platform; ++ ++static void vkd3d_test_start_todo(bool is_todo); ++static int vkd3d_test_loop_todo(void); ++static void vkd3d_test_end_todo(void); ++ ++#define START_TEST(name) \ ++ const char *vkd3d_test_name = #name; \ ++ static void vkd3d_test_main(int argc, char **argv) ++ ++/* ++ * Use assert_that() for conditions that should always be true. ++ * todo_if() and bug_if() do not influence assert_that(). ++ */ ++#define assert_that assert_that_(__LINE__) ++ ++#define ok ok_(__LINE__) ++ ++#define skip skip_(__LINE__) ++ ++#define trace trace_(__LINE__) ++ ++#define assert_that_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_ASSERT_THAT ++ ++#define VKD3D_TEST_ASSERT_THAT(...) \ ++ vkd3d_test_assert_that(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define ok_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_OK ++ ++#define VKD3D_TEST_OK(...) \ ++ vkd3d_test_ok(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define todo_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_TODO ++ ++#define VKD3D_TEST_TODO(...) \ ++ vkd3d_test_todo(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define skip_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_SKIP ++ ++#define VKD3D_TEST_SKIP(...) \ ++ vkd3d_test_skip(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define trace_(line) \ ++ do { \ ++ unsigned int vkd3d_line = line; \ ++ VKD3D_TEST_TRACE ++ ++#define VKD3D_TEST_TRACE(...) \ ++ vkd3d_test_trace(vkd3d_line, __VA_ARGS__); } while (0) ++ ++#define todo_if(is_todo) \ ++ for (vkd3d_test_start_todo(is_todo); vkd3d_test_loop_todo(); vkd3d_test_end_todo()) ++ ++#define bug_if(is_bug) \ ++ for (vkd3d_test_start_bug(is_bug); vkd3d_test_loop_bug(); vkd3d_test_end_bug()) ++ ++#define todo todo_if(true) ++ ++struct vkd3d_test_state ++{ ++ LONG success_count; ++ LONG failure_count; ++ LONG skip_count; ++ LONG todo_count; ++ LONG todo_success_count; ++ LONG bug_count; ++ ++ unsigned int debug_level; ++ ++ unsigned int todo_level; ++ bool todo_do_loop; ++ ++ unsigned int bug_level; ++ bool bug_do_loop; ++ bool bug_enabled; ++ ++ const char *test_name_filter; ++ char context[8][128]; ++ unsigned int context_count; ++}; ++extern struct vkd3d_test_state vkd3d_test_state; ++ ++static bool ++vkd3d_test_platform_is_windows(void) ++{ ++ return !strcmp(vkd3d_test_platform, "windows"); ++} ++ ++static inline bool ++broken(bool condition) ++{ ++ return condition && vkd3d_test_platform_is_windows(); ++} ++ ++static void vkd3d_test_printf(unsigned int line, const char *msg) ++{ ++ unsigned int i; ++ ++ printf("%s:%u: ", vkd3d_test_name, line); ++ for (i = 0; i < vkd3d_test_state.context_count; ++i) ++ printf("%s: ", vkd3d_test_state.context[i]); ++ printf("%s", msg); ++} ++ ++static void ++vkd3d_test_check_assert_that(unsigned int line, bool result, const char *fmt, va_list args) ++{ ++ if (result) ++ { ++ InterlockedIncrement(&vkd3d_test_state.success_count); ++ if (vkd3d_test_state.debug_level > 1) ++ vkd3d_test_printf(line, "Test succeeded.\n"); ++ } ++ else ++ { ++ InterlockedIncrement(&vkd3d_test_state.failure_count); ++ vkd3d_test_printf(line, "Test failed: "); ++ vprintf(fmt, args); ++ } ++} ++ ++static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED ++vkd3d_test_assert_that(unsigned int line, bool result, const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ vkd3d_test_check_assert_that(line, result, fmt, args); ++ va_end(args); ++} ++ ++static void ++vkd3d_test_check_ok(unsigned int line, bool result, const char *fmt, va_list args) ++{ ++ bool is_todo = vkd3d_test_state.todo_level && !vkd3d_test_platform_is_windows(); ++ bool is_bug = vkd3d_test_state.bug_level && !vkd3d_test_platform_is_windows(); ++ ++ if (is_bug && vkd3d_test_state.bug_enabled) ++ { ++ InterlockedIncrement(&vkd3d_test_state.bug_count); ++ if (is_todo) ++ result = !result; ++ if (result) ++ vkd3d_test_printf(line, "Fixed bug: "); ++ else ++ vkd3d_test_printf(line, "Bug: "); ++ vprintf(fmt, args); ++ } ++ else if (is_todo) ++ { ++ if (result) ++ { ++ InterlockedIncrement(&vkd3d_test_state.todo_success_count); ++ vkd3d_test_printf(line, "Todo succeeded: "); ++ } ++ else ++ { ++ InterlockedIncrement(&vkd3d_test_state.todo_count); ++ vkd3d_test_printf(line, "Todo: "); ++ } ++ vprintf(fmt, args); ++ } ++ else ++ { ++ vkd3d_test_check_assert_that(line, result, fmt, args); ++ } ++} ++ ++static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED ++vkd3d_test_ok(unsigned int line, bool result, const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ vkd3d_test_check_ok(line, result, fmt, args); ++ va_end(args); ++} ++ ++static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED ++vkd3d_test_skip(unsigned int line, const char *fmt, ...) ++{ ++ va_list args; ++ va_start(args, fmt); ++ vkd3d_test_printf(line, "Test skipped: "); ++ vprintf(fmt, args); ++ va_end(args); ++ InterlockedIncrement(&vkd3d_test_state.skip_count); ++} ++ ++static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED ++vkd3d_test_trace(unsigned int line, const char *fmt, ...) ++{ ++ va_list args; ++ va_start(args, fmt); ++ vkd3d_test_printf(line, ""); ++ vprintf(fmt, args); ++ va_end(args); ++} ++ ++static void VKD3D_PRINTF_FUNC(1, 2) VKD3D_UNUSED ++vkd3d_test_debug(const char *fmt, ...) ++{ ++ char buffer[512]; ++ va_list args; ++ int size; ++ ++ size = snprintf(buffer, sizeof(buffer), "%s: ", vkd3d_test_name); ++ if (0 < size && size < sizeof(buffer)) ++ { ++ va_start(args, fmt); ++ vsnprintf(buffer + size, sizeof(buffer) - size, fmt, args); ++ va_end(args); ++ } ++ buffer[sizeof(buffer) - 1] = '\0'; ++ ++#ifdef _WIN32 ++ OutputDebugStringA(buffer); ++#endif ++ ++ if (vkd3d_test_state.debug_level > 0) ++ printf("%s\n", buffer); ++} ++ ++#ifndef VKD3D_TEST_NO_DEFS ++const char *vkd3d_test_platform = "other"; ++struct vkd3d_test_state vkd3d_test_state; ++ ++static void vkd3d_test_main(int argc, char **argv); ++ ++int main(int argc, char **argv) ++{ ++ const char *test_filter = getenv("VKD3D_TEST_FILTER"); ++ const char *debug_level = getenv("VKD3D_TEST_DEBUG"); ++ char *test_platform = getenv("VKD3D_TEST_PLATFORM"); ++ const char *bug = getenv("VKD3D_TEST_BUG"); ++ ++ memset(&vkd3d_test_state, 0, sizeof(vkd3d_test_state)); ++ vkd3d_test_state.debug_level = debug_level ? atoi(debug_level) : 0; ++ vkd3d_test_state.bug_enabled = bug ? atoi(bug) : true; ++ vkd3d_test_state.test_name_filter = test_filter; ++ ++ if (test_platform) ++ { ++ test_platform = strdup(test_platform); ++ vkd3d_test_platform = test_platform; ++ } ++ ++ if (vkd3d_test_state.debug_level > 1) ++ printf("Test platform: '%s'.\n", vkd3d_test_platform); ++ ++ vkd3d_test_main(argc, argv); ++ ++ printf("%s: %lu tests executed (%lu failures, %lu skipped, %lu todo, %lu bugs).\n", ++ vkd3d_test_name, ++ (unsigned long)(vkd3d_test_state.success_count ++ + vkd3d_test_state.failure_count + vkd3d_test_state.todo_count ++ + vkd3d_test_state.todo_success_count), ++ (unsigned long)(vkd3d_test_state.failure_count ++ + vkd3d_test_state.todo_success_count), ++ (unsigned long)vkd3d_test_state.skip_count, ++ (unsigned long)vkd3d_test_state.todo_count, ++ (unsigned long)vkd3d_test_state.bug_count); ++ ++ if (test_platform) ++ free(test_platform); ++ ++ return vkd3d_test_state.failure_count || vkd3d_test_state.todo_success_count; ++} ++ ++#ifdef _WIN32 ++static char *vkd3d_test_strdupWtoA(WCHAR *str) ++{ ++ char *out; ++ int len; ++ ++ if (!(len = WideCharToMultiByte(CP_ACP, 0, str, -1, NULL, 0, NULL, NULL))) ++ return NULL; ++ if (!(out = malloc(len))) ++ return NULL; ++ WideCharToMultiByte(CP_ACP, 0, str, -1, out, len, NULL, NULL); ++ ++ return out; ++} ++ ++static bool running_under_wine(void) ++{ ++ HMODULE module = GetModuleHandleA("ntdll.dll"); ++ return module && GetProcAddress(module, "wine_server_call"); ++} ++ ++int wmain(int argc, WCHAR **wargv) ++{ ++ char **argv; ++ int i, ret; ++ ++ argv = malloc(argc * sizeof(*argv)); ++ assert(argv); ++ for (i = 0; i < argc; ++i) ++ { ++ if (!(argv[i] = vkd3d_test_strdupWtoA(wargv[i]))) ++ break; ++ } ++ assert(i == argc); ++ ++ vkd3d_test_platform = running_under_wine() ? "wine" : "windows"; ++ ++ ret = main(argc, argv); ++ ++ for (i = 0; i < argc; ++i) ++ free(argv[i]); ++ free(argv); ++ ++ return ret; ++} ++#endif /* _WIN32 */ ++#endif /* VKD3D_TEST_NO_DEFS */ ++ ++typedef void (*vkd3d_test_pfn)(void); ++ ++static inline void vkd3d_run_test(const char *name, vkd3d_test_pfn test_pfn) ++{ ++ if (vkd3d_test_state.test_name_filter && !strstr(name, vkd3d_test_state.test_name_filter)) ++ return; ++ ++ vkd3d_test_debug("%s", name); ++ test_pfn(); ++} ++ ++static inline void vkd3d_test_start_todo(bool is_todo) ++{ ++ vkd3d_test_state.todo_level = (vkd3d_test_state.todo_level << 1) | is_todo; ++ vkd3d_test_state.todo_do_loop = true; ++} ++ ++static inline int vkd3d_test_loop_todo(void) ++{ ++ bool do_loop = vkd3d_test_state.todo_do_loop; ++ vkd3d_test_state.todo_do_loop = false; ++ return do_loop; ++} ++ ++static inline void vkd3d_test_end_todo(void) ++{ ++ vkd3d_test_state.todo_level >>= 1; ++} ++ ++static inline void vkd3d_test_start_bug(bool is_bug) ++{ ++ vkd3d_test_state.bug_level = (vkd3d_test_state.bug_level << 1) | is_bug; ++ vkd3d_test_state.bug_do_loop = true; ++} ++ ++static inline int vkd3d_test_loop_bug(void) ++{ ++ bool do_loop = vkd3d_test_state.bug_do_loop; ++ vkd3d_test_state.bug_do_loop = false; ++ return do_loop; ++} ++ ++static inline void vkd3d_test_end_bug(void) ++{ ++ vkd3d_test_state.bug_level >>= 1; ++} ++ ++static inline void vkd3d_test_push_context(const char *fmt, ...) ++{ ++ va_list args; ++ ++ if (vkd3d_test_state.context_count < ARRAY_SIZE(vkd3d_test_state.context)) ++ { ++ va_start(args, fmt); ++ vsnprintf(vkd3d_test_state.context[vkd3d_test_state.context_count], ++ sizeof(vkd3d_test_state.context), fmt, args); ++ va_end(args); ++ vkd3d_test_state.context[vkd3d_test_state.context_count][sizeof(vkd3d_test_state.context[0]) - 1] = '\0'; ++ } ++ ++vkd3d_test_state.context_count; ++} ++ ++static inline void vkd3d_test_pop_context(void) ++{ ++ if (vkd3d_test_state.context_count) ++ --vkd3d_test_state.context_count; ++} ++ ++#define run_test(test_pfn) \ ++ vkd3d_run_test(#test_pfn, test_pfn) ++ ++#endif /* __VKD3D_TEST_H */ +diff --git a/libs/vkd3d/include/vkd3d_d3d9types.h b/libs/vkd3d/include/vkd3d_d3d9types.h +new file mode 100644 +index 00000000000..75d0461409d +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_d3d9types.h +@@ -0,0 +1,237 @@ ++/* ++ * Copyright 2002-2003 Jason Edmeades ++ * Copyright 2002-2003 Raphael Junqueira ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_D3D9TYPES_H ++#define __VKD3D_D3D9TYPES_H ++#ifndef _d3d9TYPES_H_ ++ ++#ifndef MAKEFOURCC ++#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ ++ ((DWORD)(BYTE)(ch0) | ((DWORD)(BYTE)(ch1) << 8) | \ ++ ((DWORD)(BYTE)(ch2) << 16) | ((DWORD)(BYTE)(ch3) << 24 )) ++#endif ++ ++#define D3DSI_INSTLENGTH_SHIFT 24 ++ ++#define D3DSP_DCL_USAGE_SHIFT 0 ++#define D3DSP_DCL_USAGEINDEX_SHIFT 16 ++#define D3DSP_DSTMOD_SHIFT 20 ++ ++#define D3DSP_SRCMOD_SHIFT 24 ++ ++#define D3DSP_REGTYPE_SHIFT 28 ++#define D3DSP_REGTYPE_SHIFT2 8 ++#define D3DSP_REGTYPE_MASK (0x7 << D3DSP_REGTYPE_SHIFT) ++#define D3DSP_REGTYPE_MASK2 0x00001800 ++ ++#define D3DSP_WRITEMASK_0 0x00010000 ++#define D3DSP_WRITEMASK_1 0x00020000 ++#define D3DSP_WRITEMASK_2 0x00040000 ++#define D3DSP_WRITEMASK_3 0x00080000 ++#define D3DSP_WRITEMASK_ALL 0x000f0000 ++ ++#define D3DPS_VERSION(major, minor) (0xffff0000 | ((major) << 8) | (minor)) ++#define D3DVS_VERSION(major, minor) (0xfffe0000 | ((major) << 8) | (minor)) ++ ++typedef enum _D3DDECLUSAGE ++{ ++ D3DDECLUSAGE_POSITION = 0x0, ++ D3DDECLUSAGE_BLENDWEIGHT = 0x1, ++ D3DDECLUSAGE_BLENDINDICES = 0x2, ++ D3DDECLUSAGE_NORMAL = 0x3, ++ D3DDECLUSAGE_PSIZE = 0x4, ++ D3DDECLUSAGE_TEXCOORD = 0x5, ++ D3DDECLUSAGE_TANGENT = 0x6, ++ D3DDECLUSAGE_BINORMAL = 0x7, ++ D3DDECLUSAGE_TESSFACTOR = 0x8, ++ D3DDECLUSAGE_POSITIONT = 0x9, ++ D3DDECLUSAGE_COLOR = 0xa, ++ D3DDECLUSAGE_FOG = 0xb, ++ D3DDECLUSAGE_DEPTH = 0xc, ++ D3DDECLUSAGE_SAMPLE = 0xd, ++} D3DDECLUSAGE; ++ ++typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE ++{ ++ D3DSIO_NOP = 0x00, ++ D3DSIO_MOV = 0x01, ++ D3DSIO_ADD = 0x02, ++ D3DSIO_SUB = 0x03, ++ D3DSIO_MAD = 0x04, ++ D3DSIO_MUL = 0x05, ++ D3DSIO_RCP = 0x06, ++ D3DSIO_RSQ = 0x07, ++ D3DSIO_DP3 = 0x08, ++ D3DSIO_DP4 = 0x09, ++ D3DSIO_MIN = 0x0a, ++ D3DSIO_MAX = 0x0b, ++ D3DSIO_SLT = 0x0c, ++ D3DSIO_SGE = 0x0d, ++ D3DSIO_EXP = 0x0e, ++ D3DSIO_LOG = 0x0f, ++ D3DSIO_LIT = 0x10, ++ D3DSIO_DST = 0x11, ++ D3DSIO_LRP = 0x12, ++ D3DSIO_FRC = 0x13, ++ D3DSIO_M4x4 = 0x14, ++ D3DSIO_M4x3 = 0x15, ++ D3DSIO_M3x4 = 0x16, ++ D3DSIO_M3x3 = 0x17, ++ D3DSIO_M3x2 = 0x18, ++ D3DSIO_CALL = 0x19, ++ D3DSIO_CALLNZ = 0x1a, ++ D3DSIO_LOOP = 0x1b, ++ D3DSIO_RET = 0x1c, ++ D3DSIO_ENDLOOP = 0x1d, ++ D3DSIO_LABEL = 0x1e, ++ D3DSIO_DCL = 0x1f, ++ D3DSIO_POW = 0x20, ++ D3DSIO_CRS = 0x21, ++ D3DSIO_SGN = 0x22, ++ D3DSIO_ABS = 0x23, ++ D3DSIO_NRM = 0x24, ++ D3DSIO_SINCOS = 0x25, ++ D3DSIO_REP = 0x26, ++ D3DSIO_ENDREP = 0x27, ++ D3DSIO_IF = 0x28, ++ D3DSIO_IFC = 0x29, ++ D3DSIO_ELSE = 0x2a, ++ D3DSIO_ENDIF = 0x2b, ++ D3DSIO_BREAK = 0x2c, ++ D3DSIO_BREAKC = 0x2d, ++ D3DSIO_MOVA = 0x2e, ++ D3DSIO_DEFB = 0x2f, ++ D3DSIO_DEFI = 0x30, ++ ++ D3DSIO_TEXCOORD = 0x40, ++ D3DSIO_TEXKILL = 0x41, ++ D3DSIO_TEX = 0x42, ++ D3DSIO_TEXBEM = 0x43, ++ D3DSIO_TEXBEML = 0x44, ++ D3DSIO_TEXREG2AR = 0x45, ++ D3DSIO_TEXREG2GB = 0x46, ++ D3DSIO_TEXM3x2PAD = 0x47, ++ D3DSIO_TEXM3x2TEX = 0x48, ++ D3DSIO_TEXM3x3PAD = 0x49, ++ D3DSIO_TEXM3x3TEX = 0x4a, ++ D3DSIO_TEXM3x3DIFF = 0x4b, ++ D3DSIO_TEXM3x3SPEC = 0x4c, ++ D3DSIO_TEXM3x3VSPEC = 0x4d, ++ D3DSIO_EXPP = 0x4e, ++ D3DSIO_LOGP = 0x4f, ++ D3DSIO_CND = 0x50, ++ D3DSIO_DEF = 0x51, ++ D3DSIO_TEXREG2RGB = 0x52, ++ D3DSIO_TEXDP3TEX = 0x53, ++ D3DSIO_TEXM3x2DEPTH = 0x54, ++ D3DSIO_TEXDP3 = 0x55, ++ D3DSIO_TEXM3x3 = 0x56, ++ D3DSIO_TEXDEPTH = 0x57, ++ D3DSIO_CMP = 0x58, ++ D3DSIO_BEM = 0x59, ++ D3DSIO_DP2ADD = 0x5a, ++ D3DSIO_DSX = 0x5b, ++ D3DSIO_DSY = 0x5c, ++ D3DSIO_TEXLDD = 0x5d, ++ D3DSIO_SETP = 0x5e, ++ D3DSIO_TEXLDL = 0x5f, ++ D3DSIO_BREAKP = 0x60, ++ ++ D3DSIO_PHASE = 0xfffd, ++ D3DSIO_COMMENT = 0xfffe, ++ D3DSIO_END = 0xffff, ++ ++ D3DSIO_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_INSTRUCTION_OPCODE_TYPE; ++ ++typedef enum _D3DSHADER_PARAM_DSTMOD_TYPE ++{ ++ D3DSPDM_NONE = 0 << D3DSP_DSTMOD_SHIFT, ++ D3DSPDM_SATURATE = 1 << D3DSP_DSTMOD_SHIFT, ++ D3DSPDM_PARTIALPRECISION = 2 << D3DSP_DSTMOD_SHIFT, ++ D3DSPDM_MSAMPCENTROID = 4 << D3DSP_DSTMOD_SHIFT, ++ ++ D3DSPDM_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_PARAM_DSTMOD_TYPE; ++ ++typedef enum _D3DSHADER_PARAM_REGISTER_TYPE ++{ ++ D3DSPR_TEMP = 0x00, ++ D3DSPR_INPUT = 0x01, ++ D3DSPR_CONST = 0x02, ++ D3DSPR_ADDR = 0x03, ++ D3DSPR_TEXTURE = 0x03, ++ D3DSPR_RASTOUT = 0x04, ++ D3DSPR_ATTROUT = 0x05, ++ D3DSPR_TEXCRDOUT = 0x06, ++ D3DSPR_OUTPUT = 0x06, ++ D3DSPR_CONSTINT = 0x07, ++ D3DSPR_COLOROUT = 0x08, ++ D3DSPR_DEPTHOUT = 0x09, ++ D3DSPR_SAMPLER = 0x0a, ++ D3DSPR_CONST2 = 0x0b, ++ D3DSPR_CONST3 = 0x0c, ++ D3DSPR_CONST4 = 0x0d, ++ D3DSPR_CONSTBOOL = 0x0e, ++ D3DSPR_LOOP = 0x0f, ++ D3DSPR_TEMPFLOAT16 = 0x10, ++ D3DSPR_MISCTYPE = 0x11, ++ D3DSPR_LABEL = 0x12, ++ D3DSPR_PREDICATE = 0x13, ++ ++ D3DSPR_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_PARAM_REGISTER_TYPE; ++ ++typedef enum _D3DSHADER_PARAM_SRCMOD_TYPE ++{ ++ D3DSPSM_NONE = 0x0 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_NEG = 0x1 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_BIAS = 0x2 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_BIASNEG = 0x3 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_SIGN = 0x4 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_SIGNNEG = 0x5 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_COMP = 0x6 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_X2 = 0x7 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_X2NEG = 0x8 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_DZ = 0x9 << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_DW = 0xa << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_ABS = 0xb << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_ABSNEG = 0xc << D3DSP_SRCMOD_SHIFT, ++ D3DSPSM_NOT = 0xd << D3DSP_SRCMOD_SHIFT, ++ ++ D3DSPSM_FORCE_DWORD = 0x7fffffff, ++} D3DSHADER_PARAM_SRCMOD_TYPE; ++ ++typedef enum _D3DSHADER_MISCTYPE_OFFSETS ++{ ++ D3DSMO_POSITION = 0x0, ++ D3DSMO_FACE = 0x1, ++} D3DSHADER_MISCTYPE_OFFSETS; ++ ++typedef enum _D3DVS_RASTOUT_OFFSETS ++{ ++ D3DSRO_POSITION = 0x0, ++ D3DSRO_FOG = 0x1, ++ D3DSRO_POINT_SIZE = 0x2, ++ ++ D3DSRO_FORCE_DWORD = 0x7fffffff, ++} D3DVS_RASTOUT_OFFSETS; ++ ++#endif /* _d3d9TYPES_H_ */ ++#endif /* __VKD3D_D3D9TYPES_H */ +diff --git a/libs/vkd3d/include/vkd3d_d3dcompiler.h b/libs/vkd3d/include/vkd3d_d3dcompiler.h +new file mode 100644 +index 00000000000..c934835dc0a +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_d3dcompiler.h +@@ -0,0 +1,74 @@ ++/* ++ * Copyright 2010 Matteo Bruni for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_D3DCOMPILER_H ++#define __VKD3D_D3DCOMPILER_H ++#ifndef __D3DCOMPILER_H__ ++ ++#define D3DCOMPILE_DEBUG 0x00000001 ++#define D3DCOMPILE_SKIP_VALIDATION 0x00000002 ++#define D3DCOMPILE_SKIP_OPTIMIZATION 0x00000004 ++#define D3DCOMPILE_PACK_MATRIX_ROW_MAJOR 0x00000008 ++#define D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR 0x00000010 ++#define D3DCOMPILE_PARTIAL_PRECISION 0x00000020 ++#define D3DCOMPILE_FORCE_VS_SOFTWARE_NO_OPT 0x00000040 ++#define D3DCOMPILE_FORCE_PS_SOFTWARE_NO_OPT 0x00000080 ++#define D3DCOMPILE_NO_PRESHADER 0x00000100 ++#define D3DCOMPILE_AVOID_FLOW_CONTROL 0x00000200 ++#define D3DCOMPILE_PREFER_FLOW_CONTROL 0x00000400 ++#define D3DCOMPILE_ENABLE_STRICTNESS 0x00000800 ++#define D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY 0x00001000 ++#define D3DCOMPILE_IEEE_STRICTNESS 0x00002000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL0 0x00004000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL1 0x00000000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL2 0x0000c000 ++#define D3DCOMPILE_OPTIMIZATION_LEVEL3 0x00008000 ++#define D3DCOMPILE_RESERVED16 0x00010000 ++#define D3DCOMPILE_RESERVED17 0x00020000 ++#define D3DCOMPILE_WARNINGS_ARE_ERRORS 0x00040000 ++#define D3DCOMPILE_RESOURCES_MAY_ALIAS 0x00080000 ++#define D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES 0x00100000 ++#define D3DCOMPILE_ALL_RESOURCES_BOUND 0x00200000 ++#define D3DCOMPILE_DEBUG_NAME_FOR_SOURCE 0x00400000 ++#define D3DCOMPILE_DEBUG_NAME_FOR_BINARY 0x00800000 ++ ++#define D3DCOMPILE_EFFECT_CHILD_EFFECT 0x00000001 ++#define D3DCOMPILE_EFFECT_ALLOW_SLOW_OPS 0x00000002 ++ ++#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_LATEST 0x00000000 ++#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_0 0x00000010 ++#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_1 0x00000020 ++ ++#define D3DCOMPILE_SECDATA_MERGE_UAV_SLOTS 0x00000001 ++#define D3DCOMPILE_SECDATA_PRESERVE_TEMPLATE_SLOTS 0x00000002 ++#define D3DCOMPILE_SECDATA_REQUIRE_TEMPLATE_MATCH 0x00000004 ++ ++HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, ++ const char *profile, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); ++HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, ++ const char *profile, UINT flags, UINT effect_flags, UINT secondary_flags, ++ const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, ++ ID3DBlob **error_messages); ++HRESULT WINAPI D3DCreateBlob(SIZE_T size, ID3DBlob **blob); ++HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, const D3D_SHADER_MACRO *macros, ++ ID3DInclude *include, ID3DBlob **shader, ID3DBlob **error_messages); ++ ++#endif /* __D3DCOMPILER_H__ */ ++#endif /* __VKD3D_D3DCOMPILER_H */ +diff --git a/libs/vkd3d/include/vkd3d_utils.h b/libs/vkd3d/include/vkd3d_utils.h +new file mode 100644 +index 00000000000..e8462563576 +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_utils.h +@@ -0,0 +1,108 @@ ++/* ++ * Copyright 2016 Józef Kucia for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_UTILS_H ++#define __VKD3D_UTILS_H ++ ++#include ++ ++#ifndef VKD3D_UTILS_API_VERSION ++#define VKD3D_UTILS_API_VERSION VKD3D_API_VERSION_1_0 ++#endif ++ ++#ifdef __cplusplus ++extern "C" { ++#endif /* __cplusplus */ ++ ++/** ++ * \file vkd3d_utils.h ++ * ++ * This file contains definitions for the vkd3d-utils library. ++ * ++ * The vkd3d-utils library is a collections of routines to ease the ++ * porting of a Direct3D 12 application to vkd3d. ++ * ++ * \since 1.0 ++ */ ++ ++#define VKD3D_WAIT_OBJECT_0 (0) ++#define VKD3D_WAIT_TIMEOUT (1) ++#define VKD3D_WAIT_FAILED (~0u) ++#define VKD3D_INFINITE (~0u) ++ ++#ifdef LIBVKD3D_UTILS_SOURCE ++# define VKD3D_UTILS_API VKD3D_EXPORT ++#else ++# define VKD3D_UTILS_API VKD3D_IMPORT ++#endif ++ ++/* 1.0 */ ++VKD3D_UTILS_API HANDLE vkd3d_create_event(void); ++VKD3D_UTILS_API HRESULT vkd3d_signal_event(HANDLE event); ++VKD3D_UTILS_API unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds); ++VKD3D_UTILS_API void vkd3d_destroy_event(HANDLE event); ++ ++#define D3D12CreateDevice(a, b, c, d) D3D12CreateDeviceVKD3D(a, b, c, d, VKD3D_UTILS_API_VERSION) ++VKD3D_UTILS_API HRESULT WINAPI D3D12CreateRootSignatureDeserializer( ++ const void *data, SIZE_T data_size, REFIID iid, void **deserializer); ++VKD3D_UTILS_API HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug); ++VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc, ++ D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob); ++ ++/* 1.2 */ ++VKD3D_UTILS_API HRESULT WINAPI D3D12CreateDeviceVKD3D(IUnknown *adapter, D3D_FEATURE_LEVEL feature_level, ++ REFIID iid, void **device, enum vkd3d_api_version api_version); ++VKD3D_UTILS_API HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data, ++ SIZE_T data_size, REFIID iid, void **deserializer); ++VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc, ++ ID3DBlob **blob, ID3DBlob **error_blob); ++ ++/* 1.3 */ ++VKD3D_UTILS_API HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, ++ const char *target, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); ++VKD3D_UTILS_API HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, ++ const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, ++ const char *target, UINT flags, UINT effect_flags, UINT secondary_flags, ++ const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, ++ ID3DBlob **error_messages); ++VKD3D_UTILS_API HRESULT WINAPI D3DCreateBlob(SIZE_T data_size, ID3DBlob **blob); ++VKD3D_UTILS_API HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, ++ const D3D_SHADER_MACRO *defines, ID3DInclude *include, ++ ID3DBlob **shader, ID3DBlob **error_messages); ++ ++/** ++ * Set a callback to be called when vkd3d-utils outputs debug logging. ++ * ++ * If NULL, or if this function has not been called, libvkd3d-utils will print ++ * all enabled log output to stderr. ++ * ++ * Calling this function will also set the log callback for libvkd3d and ++ * libvkd3d-shader. ++ * ++ * \param callback Callback function to set. ++ * ++ * \since 1.4 ++ */ ++VKD3D_UTILS_API void vkd3d_utils_set_log_callback(PFN_vkd3d_log callback); ++ ++#ifdef __cplusplus ++} ++#endif /* __cplusplus */ ++ ++#endif /* __VKD3D_UTILS_H */ +diff --git a/libs/vkd3d/include/vkd3d_windows.h b/libs/vkd3d/include/vkd3d_windows.h +new file mode 100644 +index 00000000000..002ff667cbc +--- /dev/null ++++ b/libs/vkd3d/include/vkd3d_windows.h +@@ -0,0 +1,284 @@ ++/* ++ * Copyright 2016 Józef Kucia for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#ifndef __VKD3D_WINDOWS_H ++#define __VKD3D_WINDOWS_H ++#ifndef _INC_WINDOWS ++ ++/* Nameless unions */ ++#ifndef __C89_NAMELESS ++# ifdef NONAMELESSUNION ++# define __C89_NAMELESS ++# define __C89_NAMELESSUNIONNAME u ++# else ++# define __C89_NAMELESS ++# define __C89_NAMELESSUNIONNAME ++# endif /* NONAMELESSUNION */ ++#endif /* __C89_NAMELESS */ ++ ++#if !defined(_WIN32) || defined(__WIDL__) ++ ++# if !defined(__WIDL__) ++# if !defined(VKD3D_WIN32_WCHAR) ++# include ++# endif ++# include ++# endif ++ ++# ifdef __GNUC__ ++# define DECLSPEC_ALIGN(x) __attribute__((aligned(x))) ++# endif ++ ++/* HRESULT */ ++typedef int HRESULT; ++# define SUCCEEDED(hr) ((HRESULT)(hr) >= 0) ++# define FAILED(hr) ((HRESULT)(hr) < 0) ++ ++# define _HRESULT_TYPEDEF_(x) ((HRESULT)x) ++ ++# define S_OK _HRESULT_TYPEDEF_(0) ++# define S_FALSE _HRESULT_TYPEDEF_(1) ++ ++# define E_NOTIMPL _HRESULT_TYPEDEF_(0x80004001) ++# define E_NOINTERFACE _HRESULT_TYPEDEF_(0x80004002) ++# define E_POINTER _HRESULT_TYPEDEF_(0x80004003) ++# define E_ABORT _HRESULT_TYPEDEF_(0x80004004) ++# define E_FAIL _HRESULT_TYPEDEF_(0x80004005) ++# define E_OUTOFMEMORY _HRESULT_TYPEDEF_(0x8007000E) ++# define E_INVALIDARG _HRESULT_TYPEDEF_(0x80070057) ++ ++# define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) ++# define DXGI_ERROR_MORE_DATA _HRESULT_TYPEDEF_(0x887a0003) ++ ++# define D3DERR_INVALIDCALL _HRESULT_TYPEDEF_(0x8876086c) ++ ++/* Basic types */ ++typedef unsigned char BYTE; ++typedef unsigned int DWORD; ++typedef int INT; ++typedef unsigned int UINT; ++typedef int LONG; ++typedef unsigned int ULONG; ++typedef float FLOAT; ++typedef LONG BOOL; ++ ++/* Assuming LP64 model */ ++typedef char INT8; ++typedef unsigned char UINT8; ++typedef short INT16; ++typedef unsigned short UINT16; ++typedef int INT32; ++typedef unsigned int UINT32; ++# if defined(__WIDL__) ++typedef __int64 INT64; ++typedef unsigned __int64 UINT64; ++# else ++typedef int64_t DECLSPEC_ALIGN(8) INT64; ++typedef uint64_t DECLSPEC_ALIGN(8) UINT64; ++# endif ++typedef INT64 LONG64; ++typedef long LONG_PTR; ++typedef unsigned long ULONG_PTR; ++ ++typedef ULONG_PTR SIZE_T; ++ ++# ifdef VKD3D_WIN32_WCHAR ++typedef unsigned short WCHAR; ++# else ++typedef wchar_t WCHAR; ++# endif /* VKD3D_WIN32_WCHAR */ ++typedef void *HANDLE; ++ ++/* GUID */ ++# ifdef __WIDL__ ++typedef struct ++{ ++ unsigned long Data1; ++ unsigned short Data2; ++ unsigned short Data3; ++ unsigned char Data4[8]; ++} GUID; ++# else ++typedef struct _GUID ++{ ++ unsigned int Data1; ++ unsigned short Data2; ++ unsigned short Data3; ++ unsigned char Data4[8]; ++} GUID; ++# endif ++ ++typedef GUID IID; ++ ++# ifdef INITGUID ++# ifndef __cplusplus ++# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ const GUID name DECLSPEC_HIDDEN; \ ++ const GUID name = \ ++ { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} ++# else ++# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ EXTERN_C const GUID name DECLSPEC_HIDDEN; \ ++ EXTERN_C const GUID name = \ ++ { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} ++# endif ++# else ++# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ EXTERN_C const GUID name DECLSPEC_HIDDEN; ++# endif /* INITGUID */ ++ ++/* __uuidof emulation */ ++#if defined(__cplusplus) && !defined(_MSC_VER) ++ ++extern "C++" ++{ ++ template const GUID &__vkd3d_uuidof(); ++} ++ ++# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ ++ extern "C++" \ ++ { \ ++ template<> inline const GUID &__vkd3d_uuidof() \ ++ { \ ++ static const IID __uuid_inst = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}; \ ++ return __uuid_inst; \ ++ } \ ++ template<> inline const GUID &__vkd3d_uuidof() \ ++ { \ ++ return __vkd3d_uuidof(); \ ++ } \ ++ } ++ ++# define __uuidof(type) __vkd3d_uuidof() ++#else ++# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) ++#endif /* defined(__cplusplus) && !defined(_MSC_VER) */ ++ ++typedef struct SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES; ++#endif /* !defined(_WIN32) || defined(__WIDL__) */ ++ ++ ++#ifndef _WIN32 ++# include ++# include ++# include ++ ++# define COM_NO_WINDOWS_H ++ ++# define FORCEINLINE inline ++ ++# define CONTAINING_RECORD(address, type, field) \ ++ ((type *)((char *)(address) - offsetof(type, field))) ++ ++# ifdef __x86_64__ ++# define __stdcall __attribute__((ms_abi)) ++# else ++# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 2)) || defined(__APPLE__) ++# define __stdcall __attribute__((__stdcall__)) __attribute__((__force_align_arg_pointer__)) ++# else ++# define __stdcall __attribute__((__stdcall__)) ++# endif ++# endif ++ ++# define WINAPI __stdcall ++# define STDMETHODCALLTYPE __stdcall ++ ++# ifdef __GNUC__ ++# define DECLSPEC_SELECTANY __attribute__((weak)) ++# endif ++ ++/* Macros for COM interfaces */ ++# define interface struct ++# define BEGIN_INTERFACE ++# define END_INTERFACE ++# define MIDL_INTERFACE(x) struct ++ ++# ifdef __cplusplus ++# define EXTERN_C extern "C" ++# else ++# define EXTERN_C extern ++# endif ++ ++# define CONST_VTBL const ++ ++# define TRUE 1 ++# define FALSE 0 ++ ++# if defined(__cplusplus) && !defined(CINTERFACE) ++# define REFIID const IID & ++# define REFGUID const GUID & ++# else ++# define REFIID const IID * const ++# define REFGUID const GUID * const ++# endif ++ ++#if defined(__cplusplus) && !defined(CINTERFACE) ++# define IsEqualGUID(guid1, guid2) (!memcmp(&(guid1), &(guid2), sizeof(GUID))) ++#else ++# define IsEqualGUID(guid1, guid2) (!memcmp(guid1, guid2, sizeof(GUID))) ++#endif ++ ++#elif !defined(__WIDL__) ++ ++# include ++ ++#endif /* _WIN32 */ ++ ++ ++/* Define DECLSPEC_HIDDEN */ ++#ifndef DECLSPEC_HIDDEN ++# if defined(__MINGW32__) ++# define DECLSPEC_HIDDEN ++# elif defined(__GNUC__) ++# define DECLSPEC_HIDDEN __attribute__((visibility("hidden"))) ++# else ++# define DECLSPEC_HIDDEN ++# endif ++#endif /* DECLSPEC_HIDDEN */ ++ ++/* Define min() & max() macros */ ++#ifndef NOMINMAX ++# ifndef min ++# define min(a, b) (((a) <= (b)) ? (a) : (b)) ++# endif ++ ++# ifndef max ++# define max(a, b) (((a) >= (b)) ? (a) : (b)) ++# endif ++#endif /* NOMINMAX */ ++ ++#ifndef DEFINE_ENUM_FLAG_OPERATORS ++#ifdef __cplusplus ++# define DEFINE_ENUM_FLAG_OPERATORS(type) \ ++extern "C++" \ ++{ \ ++ inline type operator &(type x, type y) { return (type)((int)x & (int)y); } \ ++ inline type operator &=(type &x, type y) { return (type &)((int &)x &= (int)y); } \ ++ inline type operator ~(type x) { return (type)~(int)x; } \ ++ inline type operator |(type x, type y) { return (type)((int)x | (int)y); } \ ++ inline type operator |=(type &x, type y) { return (type &)((int &)x |= (int)y); } \ ++ inline type operator ^(type x, type y) { return (type)((int)x ^ (int)y); } \ ++ inline type operator ^=(type &x, type y) { return (type &)((int &)x ^= (int)y); } \ ++} ++#else ++# define DEFINE_ENUM_FLAG_OPERATORS(type) ++#endif ++#endif /* DEFINE_ENUM_FLAG_OPERATORS */ ++ ++#endif /* _INC_WINDOWS */ ++#endif /* __VKD3D_WINDOWS_H */ +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index 30205088b1b..ce00e536d39 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -17,6 +17,7 @@ + */ + + #define COBJMACROS ++ + #include "vkd3d.h" + #include "vkd3d_blob.h" + #include "vkd3d_debug.h" +diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c +index 499334a35f1..b363efbd360 100644 +--- a/libs/vkd3d/libs/vkd3d-common/debug.c ++++ b/libs/vkd3d/libs/vkd3d-common/debug.c +@@ -40,9 +40,9 @@ + #define VKD3D_DEBUG_BUFFER_COUNT 64 + #define VKD3D_DEBUG_BUFFER_SIZE 512 + +-extern const char *vkd3d_dbg_env_name; ++extern const char *const vkd3d_dbg_env_name; + +-static const char *debug_level_names[] = ++static const char *const debug_level_names[] = + { + /* VKD3D_DBG_LEVEL_NONE */ "none", + /* VKD3D_DBG_LEVEL_ERR */ "err", +diff --git a/libs/vkd3d/libs/vkd3d-shader/trace.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +similarity index 98% +rename from libs/vkd3d/libs/vkd3d-shader/trace.c +rename to libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 6cd2dcb270c..3357b4505c0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/trace.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -109,6 +109,7 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_DEQ ] = "deq", + [VKD3DSIH_DFMA ] = "dfma", + [VKD3DSIH_DGE ] = "dge", ++ [VKD3DSIH_DISCARD ] = "discard", + [VKD3DSIH_DIV ] = "div", + [VKD3DSIH_DLT ] = "dlt", + [VKD3DSIH_DMAX ] = "dmax", +@@ -660,8 +661,9 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, + else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) + { + if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) +- shader_addline(buffer, "_resource_"); ++ shader_addline(buffer, "_resource"); + ++ shader_addline(buffer, "_"); + shader_dump_resource_type(compiler, semantic->resource_type); + if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) +@@ -1505,9 +1507,9 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile + { + case VKD3DSIH_BREAKP: + case VKD3DSIH_CONTINUEP: ++ case VKD3DSIH_DISCARD: + case VKD3DSIH_IF: + case VKD3DSIH_RETP: +- case VKD3DSIH_TEXKILL: + switch (ins->flags) + { + case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; +@@ -1857,11 +1859,11 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + shader_addline(buffer, "\n"); + } + +-enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) ++enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, ++ const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out) + { + enum vkd3d_shader_compile_option_formatting_flags formatting; +- struct vkd3d_shader_version *shader_version; + struct vkd3d_d3d_asm_compiler compiler; + enum vkd3d_result result = VKD3D_OK; + struct vkd3d_string_buffer *buffer; +@@ -1919,16 +1921,16 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, + buffer = &compiler.buffer; + vkd3d_string_buffer_init(buffer); + ++ compiler.shader_version = *shader_version; + shader_version = &compiler.shader_version; +- *shader_version = parser->shader_version; + vkd3d_string_buffer_printf(buffer, "%s%s_%u_%u%s\n", compiler.colours.version, + shader_get_type_prefix(shader_version->type), shader_version->major, + shader_version->minor, compiler.colours.reset); + + indent = 0; +- for (i = 0; i < parser->instructions.count; ++i) ++ for (i = 0; i < instructions->count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { +@@ -1981,12 +1983,13 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, + return result; + } + +-void vkd3d_shader_trace(struct vkd3d_shader_parser *parser) ++void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, ++ const struct vkd3d_shader_version *shader_version) + { + const char *p, *q, *end; + struct vkd3d_shader_code code; + +- if (vkd3d_dxbc_binary_to_text(parser, NULL, &code) != VKD3D_OK) ++ if (vkd3d_dxbc_binary_to_text(instructions, shader_version, NULL, &code) != VKD3D_OK) + return; + + end = (const char *)code.code + code.size; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index ed81137d225..c35f8ca0ff8 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1,4 +1,6 @@ + /* ++ * d3dbc (Direct3D shader models 1-3 bytecode) support ++ * + * Copyright 2002-2003 Jason Edmeades + * Copyright 2002-2003 Raphael Junqueira + * Copyright 2004 Christian Costa +@@ -6,6 +8,7 @@ + * Copyright 2006 Ivan Gyurdiev + * Copyright 2007-2008 Stefan Dösinger for CodeWeavers + * Copyright 2009, 2021 Henri Verbeet for CodeWeavers ++ * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public +@@ -22,7 +25,7 @@ + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +-#include "vkd3d_shader_private.h" ++#include "hlsl.h" + + #define VKD3D_SM1_VS 0xfffeu + #define VKD3D_SM1_PS 0xffffu +@@ -207,7 +210,7 @@ struct vkd3d_sm1_opcode_info + struct vkd3d_shader_sm1_parser + { + const struct vkd3d_sm1_opcode_info *opcode_table; +- const uint32_t *start, *end; ++ const uint32_t *start, *end, *ptr; + bool abort; + + struct vkd3d_shader_parser p; +@@ -462,6 +465,7 @@ static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader + src->reg.idx[1].rel_addr = NULL; + src->reg.idx[2].offset = ~0u; + src->reg.idx[2].rel_addr = NULL; ++ src->reg.idx_count = 1; + src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); + src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; + } +@@ -480,6 +484,7 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader + dst->reg.idx[1].rel_addr = NULL; + dst->reg.idx[2].offset = ~0u; + dst->reg.idx[2].rel_addr = NULL; ++ dst->reg.idx_count = 1; + dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT; + dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; + dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; +@@ -661,6 +666,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const + src_param->reg.idx[1].rel_addr = NULL; + src_param->reg.idx[2].offset = ~0u; + src_param->reg.idx[2].rel_addr = NULL; ++ src_param->reg.idx_count = 0; + src_param->reg.immconst_type = type; + memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t)); + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; +@@ -671,7 +677,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const + + static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) + { +- const uint32_t **ptr = &sm1->p.ptr; ++ const uint32_t **ptr = &sm1->ptr; + const char *comment; + unsigned int size; + size_t remaining; +@@ -738,13 +744,12 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, + } + } + +-static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) ++static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) + { +- struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); + struct vkd3d_shader_src_param *src_params, *predicate; + const struct vkd3d_sm1_opcode_info *opcode_info; + struct vkd3d_shader_dst_param *dst_param; +- const uint32_t **ptr = &parser->ptr; ++ const uint32_t **ptr = &sm1->ptr; + uint32_t opcode_token; + const uint32_t *p; + bool predicated; +@@ -758,11 +763,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru + goto fail; + } + +- ++parser->location.line; ++ ++sm1->p.location.line; + opcode_token = read_u32(ptr); + if (!(opcode_info = shader_sm1_get_opcode_info(sm1, opcode_token & VKD3D_SM1_OPCODE_MASK))) + { +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + "Invalid opcode %#x (token 0x%08x, shader version %u.%u).", + opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token, + sm1->p.shader_version.major, sm1->p.shader_version.minor); +@@ -775,14 +780,14 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru + ins->raw = false; + ins->structured = false; + predicated = !!(opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED); +- ins->predicate = predicate = predicated ? shader_parser_get_src_params(parser, 1) : NULL; ++ ins->predicate = predicate = predicated ? shader_parser_get_src_params(&sm1->p, 1) : NULL; + ins->dst_count = opcode_info->dst_count; +- ins->dst = dst_param = shader_parser_get_dst_params(parser, ins->dst_count); ++ ins->dst = dst_param = shader_parser_get_dst_params(&sm1->p, ins->dst_count); + ins->src_count = opcode_info->src_count; +- ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); ++ ins->src = src_params = shader_parser_get_src_params(&sm1->p, ins->src_count); + if ((!predicate && predicated) || (!src_params && ins->src_count) || (!dst_param && ins->dst_count)) + { +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); ++ vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); + goto fail; + } + +@@ -852,10 +857,9 @@ fail: + *ptr = sm1->end; + } + +-static bool shader_sm1_is_end(struct vkd3d_shader_parser *parser) ++static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) + { +- struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); +- const uint32_t **ptr = &parser->ptr; ++ const uint32_t **ptr = &sm1->ptr; + + shader_sm1_read_comment(sm1); + +@@ -938,7 +942,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + shader_desc = &sm1->p.shader_desc; + shader_desc->byte_code = code; + shader_desc->byte_code_size = code_size; +- sm1->p.ptr = sm1->start; ++ sm1->ptr = sm1->start; + + return VKD3D_OK; + } +@@ -965,7 +969,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + } + + instructions = &sm1->p.instructions; +- while (!shader_sm1_is_end(&sm1->p)) ++ while (!shader_sm1_is_end(sm1)) + { + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { +@@ -975,7 +979,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins = &instructions->elements[instructions->count]; +- shader_sm1_read_instruction(&sm1->p, ins); ++ shader_sm1_read_instruction(sm1, ins); + + if (ins->handler_idx == VKD3DSIH_INVALID) + { +@@ -988,5 +992,1084 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + + *parser = &sm1->p; + +- return VKD3D_OK; ++ return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; ++} ++ ++bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, ++ bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) ++{ ++ unsigned int i; ++ ++ static const struct ++ { ++ const char *semantic; ++ bool output; ++ enum vkd3d_shader_type shader_type; ++ unsigned int major_version; ++ D3DSHADER_PARAM_REGISTER_TYPE type; ++ DWORD offset; ++ } ++ register_table[] = ++ { ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, ++ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, ++ }; ++ ++ for (i = 0; i < ARRAY_SIZE(register_table); ++i) ++ { ++ if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) ++ && output == register_table[i].output ++ && ctx->profile->type == register_table[i].shader_type ++ && ctx->profile->major_version == register_table[i].major_version) ++ { ++ *type = register_table[i].type; ++ if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) ++ *reg = register_table[i].offset; ++ else ++ *reg = semantic->index; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) ++{ ++ static const struct ++ { ++ const char *name; ++ D3DDECLUSAGE usage; ++ } ++ semantics[] = ++ { ++ {"binormal", D3DDECLUSAGE_BINORMAL}, ++ {"blendindices", D3DDECLUSAGE_BLENDINDICES}, ++ {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, ++ {"color", D3DDECLUSAGE_COLOR}, ++ {"depth", D3DDECLUSAGE_DEPTH}, ++ {"fog", D3DDECLUSAGE_FOG}, ++ {"normal", D3DDECLUSAGE_NORMAL}, ++ {"position", D3DDECLUSAGE_POSITION}, ++ {"positiont", D3DDECLUSAGE_POSITIONT}, ++ {"psize", D3DDECLUSAGE_PSIZE}, ++ {"sample", D3DDECLUSAGE_SAMPLE}, ++ {"sv_depth", D3DDECLUSAGE_DEPTH}, ++ {"sv_position", D3DDECLUSAGE_POSITION}, ++ {"sv_target", D3DDECLUSAGE_COLOR}, ++ {"tangent", D3DDECLUSAGE_TANGENT}, ++ {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, ++ {"texcoord", D3DDECLUSAGE_TEXCOORD}, ++ }; ++ ++ unsigned int i; ++ ++ for (i = 0; i < ARRAY_SIZE(semantics); ++i) ++ { ++ if (!ascii_strcasecmp(semantic->name, semantics[i].name)) ++ { ++ *usage = semantics[i].usage; ++ *usage_idx = semantic->index; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) ++{ ++ if (type == VKD3D_SHADER_TYPE_VERTEX) ++ return D3DVS_VERSION(major, minor); ++ else ++ return D3DPS_VERSION(major, minor); ++} ++ ++static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) ++{ ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ return sm1_class(type->e.array.type); ++ case HLSL_CLASS_MATRIX: ++ assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) ++ return D3DXPC_MATRIX_COLUMNS; ++ else ++ return D3DXPC_MATRIX_ROWS; ++ case HLSL_CLASS_OBJECT: ++ return D3DXPC_OBJECT; ++ case HLSL_CLASS_SCALAR: ++ return D3DXPC_SCALAR; ++ case HLSL_CLASS_STRUCT: ++ return D3DXPC_STRUCT; ++ case HLSL_CLASS_VECTOR: ++ return D3DXPC_VECTOR; ++ default: ++ ERR("Invalid class %#x.\n", type->class); ++ vkd3d_unreachable(); ++ } ++} ++ ++static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) ++{ ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_BOOL: ++ return D3DXPT_BOOL; ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ return D3DXPT_FLOAT; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ return D3DXPT_INT; ++ case HLSL_TYPE_PIXELSHADER: ++ return D3DXPT_PIXELSHADER; ++ case HLSL_TYPE_SAMPLER: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3DXPT_SAMPLER1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3DXPT_SAMPLER2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3DXPT_SAMPLER3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3DXPT_SAMPLERCUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3DXPT_SAMPLER; ++ default: ++ ERR("Invalid dimension %#x.\n", type->sampler_dim); ++ vkd3d_unreachable(); ++ } ++ break; ++ case HLSL_TYPE_STRING: ++ return D3DXPT_STRING; ++ case HLSL_TYPE_TEXTURE: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3DXPT_TEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3DXPT_TEXTURE2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3DXPT_TEXTURE3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3DXPT_TEXTURECUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3DXPT_TEXTURE; ++ default: ++ ERR("Invalid dimension %#x.\n", type->sampler_dim); ++ vkd3d_unreachable(); ++ } ++ break; ++ case HLSL_TYPE_VERTEXSHADER: ++ return D3DXPT_VERTEXSHADER; ++ case HLSL_TYPE_VOID: ++ return D3DXPT_VOID; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) ++{ ++ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); ++ unsigned int array_size = hlsl_get_multiarray_size(type); ++ unsigned int field_count = 0; ++ size_t fields_offset = 0; ++ size_t i; ++ ++ if (type->bytecode_offset) ++ return; ++ ++ if (array_type->class == HLSL_CLASS_STRUCT) ++ { ++ field_count = array_type->e.record.field_count; ++ ++ for (i = 0; i < field_count; ++i) ++ { ++ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; ++ ++ field->name_bytecode_offset = put_string(buffer, field->name); ++ write_sm1_type(buffer, field->type, ctab_start); ++ } ++ ++ fields_offset = bytecode_align(buffer) - ctab_start; ++ ++ for (i = 0; i < field_count; ++i) ++ { ++ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; ++ ++ put_u32(buffer, field->name_bytecode_offset - ctab_start); ++ put_u32(buffer, field->type->bytecode_offset - ctab_start); ++ } ++ } ++ ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); ++ put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); ++ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); ++ put_u32(buffer, fields_offset); ++} ++ ++static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) ++{ ++ struct hlsl_ir_var *var; ++ ++ list_remove(&to_sort->extern_entry); ++ ++ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) ++ { ++ if (strcmp(to_sort->name, var->name) < 0) ++ { ++ list_add_before(&var->extern_entry, &to_sort->extern_entry); ++ return; ++ } ++ } ++ ++ list_add_tail(sorted, &to_sort->extern_entry); ++} ++ ++static void sm1_sort_externs(struct hlsl_ctx *ctx) ++{ ++ struct list sorted = LIST_INIT(sorted); ++ struct hlsl_ir_var *var, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform) ++ sm1_sort_extern(&sorted, var); ++ } ++ list_move_tail(&ctx->extern_vars, &sorted); ++} ++ ++static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ struct hlsl_ir_function_decl *entry_func) ++{ ++ size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; ++ unsigned int uniform_count = 0; ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); ++ ++ if (!var->semantic.name && var->regs[regset].allocated) ++ { ++ ++uniform_count; ++ ++ if (var->is_param && var->is_uniform) ++ { ++ struct vkd3d_string_buffer *name; ++ ++ if (!(name = hlsl_get_string_buffer(ctx))) ++ { ++ buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ vkd3d_string_buffer_printf(name, "$%s", var->name); ++ vkd3d_free((char *)var->name); ++ var->name = hlsl_strdup(ctx, name->buffer); ++ hlsl_release_string_buffer(ctx, name); ++ } ++ } ++ } ++ ++ sm1_sort_externs(ctx); ++ ++ size_offset = put_u32(buffer, 0); ++ ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); ++ ++ ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); ++ creator_offset = put_u32(buffer, 0); ++ put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); ++ put_u32(buffer, uniform_count); ++ put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ ++ put_u32(buffer, 0); /* FIXME: flags */ ++ put_u32(buffer, 0); /* FIXME: target string */ ++ ++ vars_start = bytecode_align(buffer); ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); ++ ++ if (!var->semantic.name && var->regs[regset].allocated) ++ { ++ put_u32(buffer, 0); /* name */ ++ if (regset == HLSL_REGSET_NUMERIC) ++ { ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); ++ put_u32(buffer, var->data_type->reg_size[regset] / 4); ++ } ++ else ++ { ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); ++ put_u32(buffer, var->regs[regset].bind_count); ++ } ++ put_u32(buffer, 0); /* type */ ++ put_u32(buffer, 0); /* FIXME: default value */ ++ } ++ } ++ ++ uniform_count = 0; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); ++ ++ if (!var->semantic.name && var->regs[regset].allocated) ++ { ++ size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); ++ size_t name_offset; ++ ++ name_offset = put_string(buffer, var->name); ++ set_u32(buffer, var_offset, name_offset - ctab_start); ++ ++ write_sm1_type(buffer, var->data_type, ctab_start); ++ set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); ++ ++uniform_count; ++ } ++ } ++ ++ offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); ++ set_u32(buffer, creator_offset, offset - ctab_start); ++ ++ ctab_end = bytecode_align(buffer); ++ set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); ++} ++ ++static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) ++{ ++ return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) ++ | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); ++} ++ ++struct sm1_instruction ++{ ++ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; ++ ++ struct sm1_dst_register ++ { ++ D3DSHADER_PARAM_REGISTER_TYPE type; ++ D3DSHADER_PARAM_DSTMOD_TYPE mod; ++ unsigned int writemask; ++ uint32_t reg; ++ } dst; ++ ++ struct sm1_src_register ++ { ++ D3DSHADER_PARAM_REGISTER_TYPE type; ++ D3DSHADER_PARAM_SRCMOD_TYPE mod; ++ unsigned int swizzle; ++ uint32_t reg; ++ } srcs[3]; ++ unsigned int src_count; ++ ++ unsigned int has_dst; ++}; ++ ++static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) ++{ ++ assert(reg->writemask); ++ put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); ++} ++ ++static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, ++ const struct sm1_src_register *reg) ++{ ++ put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); ++} ++ ++static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct sm1_instruction *instr) ++{ ++ uint32_t token = instr->opcode; ++ unsigned int i; ++ ++ if (ctx->profile->major_version > 1) ++ token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; ++ put_u32(buffer, token); ++ ++ if (instr->has_dst) ++ write_sm1_dst_register(buffer, &instr->dst); ++ ++ for (i = 0; i < instr->src_count; ++i) ++ write_sm1_src_register(buffer, &instr->srcs[i]); ++}; ++ ++static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) ++{ ++ src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); ++} ++ ++static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, ++ const struct hlsl_reg *src3) ++{ ++ struct sm1_instruction instr = ++ { ++ .opcode = D3DSIO_DP2ADD, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.writemask = dst->writemask, ++ .dst.reg = dst->id, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), ++ .srcs[0].reg = src1->id, ++ .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), ++ .srcs[1].reg = src2->id, ++ .srcs[2].type = D3DSPR_TEMP, ++ .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), ++ .srcs[2].reg = src3->id, ++ .src_count = 3, ++ }; ++ ++ write_sm1_instruction(ctx, buffer, &instr); ++} ++ ++static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++{ ++ struct sm1_instruction instr = ++ { ++ .opcode = opcode, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.writemask = dst->writemask, ++ .dst.reg = dst->id, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), ++ .srcs[0].reg = src1->id, ++ .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), ++ .srcs[1].reg = src2->id, ++ .src_count = 2, ++ }; ++ ++ sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); ++ sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); ++ write_sm1_instruction(ctx, buffer, &instr); ++} ++ ++static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++{ ++ struct sm1_instruction instr = ++ { ++ .opcode = opcode, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.writemask = dst->writemask, ++ .dst.reg = dst->id, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), ++ .srcs[0].reg = src1->id, ++ .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), ++ .srcs[1].reg = src2->id, ++ .src_count = 2, ++ }; ++ ++ write_sm1_instruction(ctx, buffer, &instr); ++} ++ ++static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) ++{ ++ struct sm1_instruction instr = ++ { ++ .opcode = opcode, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.mod = dst_mod, ++ .dst.writemask = dst->writemask, ++ .dst.reg = dst->id, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), ++ .srcs[0].reg = src->id, ++ .srcs[0].mod = src_mod, ++ .src_count = 1, ++ }; ++ ++ sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); ++ write_sm1_instruction(ctx, buffer, &instr); ++} ++ ++static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++{ ++ unsigned int i, x; ++ ++ for (i = 0; i < ctx->constant_defs.count; ++i) ++ { ++ uint32_t token = D3DSIO_DEF; ++ const struct sm1_dst_register reg = ++ { ++ .type = D3DSPR_CONST, ++ .writemask = VKD3DSP_WRITEMASK_ALL, ++ .reg = i, ++ }; ++ ++ if (ctx->profile->major_version > 1) ++ token |= 5 << D3DSI_INSTLENGTH_SHIFT; ++ put_u32(buffer, token); ++ ++ write_sm1_dst_register(buffer, ®); ++ for (x = 0; x < 4; ++x) ++ put_f32(buffer, ctx->constant_defs.values[i].f[x]); ++ } ++} ++ ++static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_var *var, bool output) ++{ ++ struct sm1_dst_register reg = {0}; ++ uint32_t token, usage_idx; ++ D3DDECLUSAGE usage; ++ bool ret; ++ ++ if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) ++ { ++ usage = 0; ++ usage_idx = 0; ++ } ++ else ++ { ++ ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); ++ assert(ret); ++ reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; ++ reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; ++ } ++ ++ token = D3DSIO_DCL; ++ if (ctx->profile->major_version > 1) ++ token |= 2 << D3DSI_INSTLENGTH_SHIFT; ++ put_u32(buffer, token); ++ ++ token = (1u << 31); ++ token |= usage << D3DSP_DCL_USAGE_SHIFT; ++ token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; ++ put_u32(buffer, token); ++ ++ reg.writemask = (1 << var->data_type->dimx) - 1; ++ write_sm1_dst_register(buffer, ®); ++} ++ ++static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++{ ++ bool write_in = false, write_out = false; ++ struct hlsl_ir_var *var; ++ ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) ++ write_in = true; ++ else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) ++ write_in = write_out = true; ++ else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) ++ write_in = true; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (write_in && var->is_input_semantic) ++ write_sm1_semantic_dcl(ctx, buffer, var, false); ++ if (write_out && var->is_output_semantic) ++ write_sm1_semantic_dcl(ctx, buffer, var, true); ++ } ++} ++ ++static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) ++{ ++ struct sm1_dst_register reg = {0}; ++ uint32_t token, res_type = 0; ++ ++ token = D3DSIO_DCL; ++ if (ctx->profile->major_version > 1) ++ token |= 2 << D3DSI_INSTLENGTH_SHIFT; ++ put_u32(buffer, token); ++ ++ switch (sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; ++ break; ++ ++ case HLSL_SAMPLER_DIM_2D: ++ res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; ++ break; ++ ++ case HLSL_SAMPLER_DIM_CUBE: ++ res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; ++ break; ++ ++ case HLSL_SAMPLER_DIM_3D: ++ res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ break; ++ } ++ ++ token = (1u << 31); ++ token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; ++ put_u32(buffer, token); ++ ++ reg.type = D3DSPR_SAMPLER; ++ reg.writemask = VKD3DSP_WRITEMASK_ALL; ++ reg.reg = reg_id; ++ ++ write_sm1_dst_register(buffer, ®); ++} ++ ++static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++{ ++ enum hlsl_sampler_dim sampler_dim; ++ unsigned int i, count, reg_id; ++ struct hlsl_ir_var *var; ++ ++ if (ctx->profile->major_version < 2) ++ return; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) ++ continue; ++ ++ count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; ++ ++ for (i = 0; i < count; ++i) ++ { ++ if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) ++ { ++ sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; ++ assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); ++ ++ reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; ++ write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); ++ } ++ } ++ } ++} ++ ++static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); ++ struct sm1_instruction sm1_instr = ++ { ++ .opcode = D3DSIO_MOV, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = instr->reg.id, ++ .dst.writemask = instr->reg.writemask, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_CONST, ++ .srcs[0].reg = constant->reg.id, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), ++ .src_count = 1, ++ }; ++ ++ assert(instr->reg.allocated); ++ assert(constant->reg.allocated); ++ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); ++ write_sm1_instruction(ctx, buffer, &sm1_instr); ++} ++ ++static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) ++{ ++ struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); ++ struct hlsl_ir_node *arg1 = expr->operands[0].node; ++ unsigned int i; ++ ++ for (i = 0; i < instr->data_type->dimx; ++i) ++ { ++ struct hlsl_reg src = arg1->reg, dst = instr->reg; ++ ++ src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); ++ dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); ++ write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); ++ } ++} ++ ++static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++{ ++ struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); ++ struct hlsl_ir_node *arg1 = expr->operands[0].node; ++ struct hlsl_ir_node *arg2 = expr->operands[1].node; ++ struct hlsl_ir_node *arg3 = expr->operands[2].node; ++ ++ assert(instr->reg.allocated); ++ ++ if (instr->data_type->base_type != HLSL_TYPE_FLOAT) ++ { ++ /* These need to be lowered. */ ++ hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); ++ return; ++ } ++ ++ switch (expr->op) ++ { ++ case HLSL_OP1_ABS: ++ write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); ++ break; ++ ++ case HLSL_OP1_DSX: ++ write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); ++ break; ++ ++ case HLSL_OP1_DSY: ++ write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); ++ break; ++ ++ case HLSL_OP1_EXP2: ++ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); ++ break; ++ ++ case HLSL_OP1_LOG2: ++ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); ++ break; ++ ++ case HLSL_OP1_NEG: ++ write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); ++ break; ++ ++ case HLSL_OP1_SAT: ++ write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); ++ break; ++ ++ case HLSL_OP1_RCP: ++ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); ++ break; ++ ++ case HLSL_OP1_RSQ: ++ write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); ++ break; ++ ++ case HLSL_OP2_ADD: ++ write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ case HLSL_OP2_MAX: ++ write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ case HLSL_OP2_MIN: ++ write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ case HLSL_OP2_MUL: ++ write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ case HLSL_OP1_FRACT: ++ write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); ++ break; ++ ++ case HLSL_OP2_DOT: ++ switch (arg1->data_type->dimx) ++ { ++ case 4: ++ write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ case 3: ++ write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_OP3_DP2ADD: ++ write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); ++ break; ++ } ++} ++ ++static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); ++ struct sm1_instruction sm1_instr = ++ { ++ .opcode = D3DSIO_MOV, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = instr->reg.id, ++ .dst.writemask = instr->reg.writemask, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].reg = reg.id, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), ++ .src_count = 1, ++ }; ++ ++ assert(instr->reg.allocated); ++ ++ if (load->src.var->is_uniform) ++ { ++ assert(reg.allocated); ++ sm1_instr.srcs[0].type = D3DSPR_CONST; ++ } ++ else if (load->src.var->is_input_semantic) ++ { ++ if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, ++ false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) ++ { ++ assert(reg.allocated); ++ sm1_instr.srcs[0].type = D3DSPR_INPUT; ++ sm1_instr.srcs[0].reg = reg.id; ++ } ++ else ++ sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); ++ } ++ ++ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); ++ write_sm1_instruction(ctx, buffer, &sm1_instr); ++} ++ ++static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); ++ struct hlsl_ir_node *coords = load->coords.node; ++ unsigned int sampler_offset, reg_id; ++ struct sm1_instruction sm1_instr; ++ ++ sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); ++ reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; ++ ++ sm1_instr = (struct sm1_instruction) ++ { ++ .opcode = D3DSIO_TEX, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = instr->reg.id, ++ .dst.writemask = instr->reg.writemask, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].reg = coords->reg.id, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), ++ ++ .srcs[1].type = D3DSPR_SAMPLER, ++ .srcs[1].reg = reg_id, ++ .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), ++ ++ .src_count = 2, ++ }; ++ ++ assert(instr->reg.allocated); ++ ++ write_sm1_instruction(ctx, buffer, &sm1_instr); ++} ++ ++static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_store *store = hlsl_ir_store(instr); ++ const struct hlsl_ir_node *rhs = store->rhs.node; ++ const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); ++ struct sm1_instruction sm1_instr = ++ { ++ .opcode = D3DSIO_MOV, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = reg.id, ++ .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].reg = rhs->reg.id, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), ++ .src_count = 1, ++ }; ++ ++ if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) ++ { ++ FIXME("Matrix writemasks need to be lowered.\n"); ++ return; ++ } ++ ++ if (store->lhs.var->is_output_semantic) ++ { ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) ++ { ++ sm1_instr.dst.type = D3DSPR_TEMP; ++ sm1_instr.dst.reg = 0; ++ } ++ else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, ++ true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) ++ { ++ assert(reg.allocated); ++ sm1_instr.dst.type = D3DSPR_OUTPUT; ++ sm1_instr.dst.reg = reg.id; ++ } ++ else ++ sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; ++ } ++ else ++ assert(reg.allocated); ++ ++ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); ++ write_sm1_instruction(ctx, buffer, &sm1_instr); ++} ++ ++static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); ++ const struct hlsl_ir_node *val = swizzle->val.node; ++ struct sm1_instruction sm1_instr = ++ { ++ .opcode = D3DSIO_MOV, ++ ++ .dst.type = D3DSPR_TEMP, ++ .dst.reg = instr->reg.id, ++ .dst.writemask = instr->reg.writemask, ++ .has_dst = 1, ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].reg = val->reg.id, ++ .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), ++ swizzle->swizzle, instr->data_type->dimx), ++ .src_count = 1, ++ }; ++ ++ assert(instr->reg.allocated); ++ assert(val->reg.allocated); ++ sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); ++ write_sm1_instruction(ctx, buffer, &sm1_instr); ++} ++ ++static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_function_decl *entry_func) ++{ ++ const struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->data_type) ++ { ++ if (instr->data_type->class == HLSL_CLASS_MATRIX) ++ { ++ /* These need to be lowered. */ ++ hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); ++ continue; ++ } ++ else if (instr->data_type->class == HLSL_CLASS_OBJECT) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Object copy."); ++ break; ++ } ++ ++ assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_CALL: ++ vkd3d_unreachable(); ++ ++ case HLSL_IR_CONSTANT: ++ write_sm1_constant(ctx, buffer, instr); ++ break; ++ ++ case HLSL_IR_EXPR: ++ write_sm1_expr(ctx, buffer, instr); ++ break; ++ ++ case HLSL_IR_LOAD: ++ write_sm1_load(ctx, buffer, instr); ++ break; ++ ++ case HLSL_IR_RESOURCE_LOAD: ++ write_sm1_resource_load(ctx, buffer, instr); ++ break; ++ ++ case HLSL_IR_STORE: ++ write_sm1_store(ctx, buffer, instr); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ write_sm1_swizzle(ctx, buffer, instr); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); ++ } ++ } ++} ++ ++int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) ++{ ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ int ret; ++ ++ put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); ++ ++ write_sm1_uniforms(ctx, &buffer, entry_func); ++ ++ write_sm1_constant_defs(ctx, &buffer); ++ write_sm1_semantic_dcls(ctx, &buffer); ++ write_sm1_sampler_dcls(ctx, &buffer); ++ write_sm1_instructions(ctx, &buffer, entry_func); ++ ++ put_u32(&buffer, D3DSIO_END); ++ ++ if (!(ret = buffer.status)) ++ { ++ out->code = buffer.data; ++ out->size = buffer.size; ++ } ++ return ret; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +index d99ea2e36b6..3e3f06faeb5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +@@ -19,1680 +19,74 @@ + */ + + #include "vkd3d_shader_private.h" +-#include "sm4.h" + +-#define SM4_MAX_SRC_COUNT 6 +-#define SM4_MAX_DST_COUNT 2 +- +-STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); +- +-void dxbc_writer_init(struct dxbc_writer *dxbc) +-{ +- memset(dxbc, 0, sizeof(*dxbc)); +-} +- +-void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) +-{ +- struct vkd3d_shader_dxbc_section_desc *section; +- +- assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); +- +- section = &dxbc->sections[dxbc->section_count++]; +- section->tag = tag; +- section->data.code = data; +- section->data.size = size; +-} +- +-int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, +- struct vkd3d_shader_code *dxbc, char **messages) +-{ +- size_t size_position, offsets_position, checksum_position, i; +- struct vkd3d_bytecode_buffer buffer = {0}; +- uint32_t checksum[4]; +- +- TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); +- +- if (messages) +- *messages = NULL; +- +- put_u32(&buffer, TAG_DXBC); +- +- checksum_position = bytecode_get_size(&buffer); +- for (i = 0; i < 4; ++i) +- put_u32(&buffer, 0); +- +- put_u32(&buffer, 1); /* version */ +- size_position = put_u32(&buffer, 0); +- put_u32(&buffer, section_count); +- +- offsets_position = bytecode_get_size(&buffer); +- for (i = 0; i < section_count; ++i) +- put_u32(&buffer, 0); +- +- for (i = 0; i < section_count; ++i) +- { +- set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_get_size(&buffer)); +- put_u32(&buffer, sections[i].tag); +- put_u32(&buffer, sections[i].data.size); +- bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); +- } +- set_u32(&buffer, size_position, bytecode_get_size(&buffer)); +- +- vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); +- for (i = 0; i < 4; ++i) +- set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); +- +- if (!buffer.status) +- { +- dxbc->code = buffer.data; +- dxbc->size = buffer.size; +- } +- return buffer.status; +-} +- +-int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) +-{ +- return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); +-} +- +-struct vkd3d_shader_src_param_entry +-{ +- struct list entry; +- struct vkd3d_shader_src_param param; +-}; +- +-struct vkd3d_shader_sm4_parser +-{ +- const uint32_t *start, *end; +- +- unsigned int output_map[MAX_REG_OUTPUT]; +- +- struct vkd3d_shader_parser p; +-}; +- +-struct vkd3d_sm4_opcode_info +-{ +- enum vkd3d_sm4_opcode opcode; +- enum vkd3d_shader_opcode handler_idx; +- char dst_info[SM4_MAX_DST_COUNT]; +- char src_info[SM4_MAX_SRC_COUNT]; +- void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); +-}; +- +-static const enum vkd3d_primitive_type output_primitive_type_table[] = +-{ +- /* UNKNOWN */ VKD3D_PT_UNDEFINED, +- /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, +- /* UNKNOWN */ VKD3D_PT_UNDEFINED, +- /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, +- /* UNKNOWN */ VKD3D_PT_UNDEFINED, +- /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, +-}; +- +-static const enum vkd3d_primitive_type input_primitive_type_table[] = +-{ +- /* UNKNOWN */ VKD3D_PT_UNDEFINED, +- /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, +- /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, +- /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, +- /* UNKNOWN */ VKD3D_PT_UNDEFINED, +- /* UNKNOWN */ VKD3D_PT_UNDEFINED, +- /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, +- /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, +-}; +- +-static const enum vkd3d_shader_resource_type resource_type_table[] = +-{ +- /* 0 */ VKD3D_SHADER_RESOURCE_NONE, +- /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +- /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, +- /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, +- /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, +- /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, +- /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, +- /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, +- /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, +- /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, +- /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, +- /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +- /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +-}; +- +-static const enum vkd3d_data_type data_type_table[] = +-{ +- /* 0 */ VKD3D_DATA_FLOAT, +- /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, +- /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, +- /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, +- /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, +- /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, +- /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, +- /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, +- /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, +- /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, +-}; +- +-static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) +-{ +- return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); +-} +- +-static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) +-{ +- const struct vkd3d_shader_version *version = &sm4->p.shader_version; +- +- return version->major >= 5 && version->minor >= 1; +-} +- +-static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, +- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); +-static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, +- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); +- +-static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, +- const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) +-{ +- *register_space = 0; +- +- if (!shader_is_sm_5_1(priv)) +- return true; +- +- if (*ptr >= end) +- { +- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); +- return false; +- } +- +- *register_space = *(*ptr)++; +- return true; +-} +- +-static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, +- (struct vkd3d_shader_src_param *)&ins->src[0]); +- ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? +- VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; +-} +- +-static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- struct vkd3d_shader_immediate_constant_buffer *icb; +- enum vkd3d_sm4_shader_data_type type; +- unsigned int icb_size; +- +- type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; +- if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) +- { +- FIXME("Ignoring shader data type %#x.\n", type); +- ins->handler_idx = VKD3DSIH_NOP; +- return; +- } +- +- ++tokens; +- icb_size = token_count - 1; +- if (icb_size % 4) +- { +- FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); +- ins->handler_idx = VKD3DSIH_INVALID; +- return; +- } +- +- if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) +- { +- ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); +- vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; +- return; +- } +- icb->vec4_count = icb_size / 4; +- memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); +- shader_instruction_array_add_icb(&priv->p.instructions, icb); +- ins->declaration.icb = icb; +-} +- +-static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, +- const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) +-{ +- range->first = reg->idx[1].offset; +- range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; +- if (range->last < range->first) +- { +- FIXME("Invalid register range [%u:%u].\n", range->first, range->last); +- vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, +- "Last register %u must not be less than first register %u in range.\n", range->last, range->first); +- } +-} +- +-static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; +- enum vkd3d_sm4_resource_type resource_type; +- const uint32_t *end = &tokens[token_count]; +- enum vkd3d_sm4_data_type data_type; +- enum vkd3d_data_type reg_data_type; +- DWORD components; +- unsigned int i; +- +- resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; +- if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) +- { +- FIXME("Unhandled resource type %#x.\n", resource_type); +- semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; +- } +- else +- { +- semantic->resource_type = resource_type_table[resource_type]; +- } +- +- if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS +- || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) +- { +- semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; +- } +- +- reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; +- shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); +- shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); +- +- components = *tokens++; +- for (i = 0; i < VKD3D_VEC4_SIZE; i++) +- { +- data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); +- +- if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) +- { +- FIXME("Unhandled data type %#x.\n", data_type); +- semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; +- } +- else +- { +- semantic->resource_data_type[i] = data_type_table[data_type]; +- } +- } +- +- if (reg_data_type == VKD3D_DATA_UAV) +- ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; +- +- shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); +-} +- +-static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- const uint32_t *end = &tokens[token_count]; +- +- shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); +- shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); +- if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) +- ins->flags |= VKD3DSI_INDEXED_DYNAMIC; +- +- ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; +- ins->declaration.cb.range.space = 0; +- +- if (shader_is_sm_5_1(priv)) +- { +- if (tokens >= end) +- { +- FIXME("Invalid ptr %p >= end %p.\n", tokens, end); +- return; +- } +- +- ins->declaration.cb.size = *tokens++; +- shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); +- } +-} +- +-static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- const uint32_t *end = &tokens[token_count]; +- +- ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; +- if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) +- FIXME("Unhandled sampler mode %#x.\n", ins->flags); +- shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); +- shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); +- shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); +-} +- +-static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, +- &ins->declaration.index_range.dst); +- ins->declaration.index_range.register_count = *tokens; +-} +- +-static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- enum vkd3d_sm4_output_primitive_type primitive_type; +- +- primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; +- if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) +- ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; +- else +- ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; +- +- if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) +- FIXME("Unhandled output primitive type %#x.\n", primitive_type); +-} +- +-static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- enum vkd3d_sm4_input_primitive_type primitive_type; +- +- primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; +- if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) +- { +- ins->declaration.primitive_type.type = VKD3D_PT_PATCH; +- ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; +- } +- else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) +- { +- ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; +- } +- else +- { +- ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; +- } +- +- if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) +- FIXME("Unhandled input primitive type %#x.\n", primitive_type); +-} +- +-static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.count = *tokens; +-} +- +-static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +-} +- +-static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, +- &ins->declaration.register_semantic.reg); +- ins->declaration.register_semantic.sysval_semantic = *tokens; +-} +- +-static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; +- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +-} +- +-static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; +- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, +- &ins->declaration.register_semantic.reg); +- ins->declaration.register_semantic.sysval_semantic = *tokens; +-} +- +-static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.indexable_temp.register_idx = *tokens++; +- ins->declaration.indexable_temp.register_size = *tokens++; +- ins->declaration.indexable_temp.component_count = *tokens; +-} +- +-static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; +-} +- +-static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; +- src_params[0].reg.u.fp_body_idx = *tokens++; +- shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); +-} +- +-static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.index = *tokens; +-} +- +-static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.index = *tokens++; +- FIXME("Ignoring set of function bodies (count %u).\n", *tokens); +-} +- +-static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.fp.index = *tokens++; +- ins->declaration.fp.body_count = *tokens++; +- ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; +- ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; +- FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); +-} +- +-static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) +- >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; +-} +- +-static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) +- >> VKD3D_SM5_TESSELLATOR_SHIFT; +-} +- +-static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) +- >> VKD3D_SM5_TESSELLATOR_SHIFT; +-} +- +-static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) +- >> VKD3D_SM5_TESSELLATOR_SHIFT; +-} +- +-static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.max_tessellation_factor = *(float *)tokens; +-} +- +-static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->declaration.thread_group_size.x = *tokens++; +- ins->declaration.thread_group_size.y = *tokens++; +- ins->declaration.thread_group_size.z = *tokens++; +-} +- +-static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; +- const uint32_t *end = &tokens[token_count]; +- +- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); +- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); +- ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; +- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +-} +- +-static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; +- const uint32_t *end = &tokens[token_count]; +- +- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); +- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); +- ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; +- resource->byte_stride = *tokens++; +- if (resource->byte_stride % 4) +- FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); +- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +-} +- +-static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); +- ins->declaration.tgsm_raw.byte_count = *tokens; +- if (ins->declaration.tgsm_raw.byte_count % 4) +- FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); +-} +- +-static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, +- &ins->declaration.tgsm_structured.reg); +- ins->declaration.tgsm_structured.byte_stride = *tokens++; +- ins->declaration.tgsm_structured.structure_count = *tokens; +- if (ins->declaration.tgsm_structured.byte_stride % 4) +- FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); +-} +- +-static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; +- const uint32_t *end = &tokens[token_count]; +- +- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); +- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); +- resource->byte_stride = *tokens++; +- if (resource->byte_stride % 4) +- FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); +- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +-} +- +-static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; +- const uint32_t *end = &tokens[token_count]; +- +- shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); +- shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); +- shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +-} +- +-static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +- const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +-{ +- ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; +-} +- +-/* +- * d -> VKD3D_DATA_DOUBLE +- * f -> VKD3D_DATA_FLOAT +- * i -> VKD3D_DATA_INT +- * u -> VKD3D_DATA_UINT +- * O -> VKD3D_DATA_OPAQUE +- * R -> VKD3D_DATA_RESOURCE +- * S -> VKD3D_DATA_SAMPLER +- * U -> VKD3D_DATA_UAV +- */ +-static const struct vkd3d_sm4_opcode_info opcode_table[] = +-{ +- {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, +- {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, +- {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, +- {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", +- shader_sm4_read_conditional_op}, +- {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, +- {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, +- {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", +- shader_sm4_read_conditional_op}, +- {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, +- {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, +- {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, +- {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, +- {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", +- shader_sm4_read_conditional_op}, +- {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, +- {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, +- {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, +- {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, +- {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, +- {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, +- {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, +- {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, +- {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, +- {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, +- {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, +- {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, +- {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, +- {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, +- {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, +- {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, +- {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", +- shader_sm4_read_conditional_op}, +- {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, +- {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, +- {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, +- {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, +- {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, +- {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, +- {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, +- {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, +- {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, +- {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, +- {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, +- {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, +- {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, +- {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, +- {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, +- {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, +- {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, +- {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, +- {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, +- {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, +- {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, +- {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", +- shader_sm4_read_shader_data}, +- {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, +- {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, +- {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, +- {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, +- {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, +- {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, +- {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, +- {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, +- {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, +- {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", +- shader_sm4_read_conditional_op}, +- {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, +- {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, +- {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, +- {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, +- {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, +- {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, +- {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, +- {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, +- {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, +- {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, +- {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, +- {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, +- {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, +- {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, +- {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, +- {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, +- {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, +- {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, +- {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, +- {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, +- {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, +- {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, +- {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, +- {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", +- shader_sm4_read_dcl_resource}, +- {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", +- shader_sm4_read_dcl_constant_buffer}, +- {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", +- shader_sm4_read_dcl_sampler}, +- {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", +- shader_sm4_read_dcl_index_range}, +- {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", +- shader_sm4_read_dcl_output_topology}, +- {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", +- shader_sm4_read_dcl_input_primitive}, +- {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", +- shader_sm4_read_declaration_count}, +- {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", +- shader_sm4_read_declaration_dst}, +- {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", +- shader_sm4_read_declaration_register_semantic}, +- {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", +- shader_sm4_read_declaration_register_semantic}, +- {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", +- shader_sm4_read_dcl_input_ps}, +- {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", +- shader_sm4_read_declaration_register_semantic}, +- {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", +- shader_sm4_read_dcl_input_ps_siv}, +- {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", +- shader_sm4_read_declaration_dst}, +- {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", +- shader_sm4_read_declaration_register_semantic}, +- {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", +- shader_sm4_read_declaration_count}, +- {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", +- shader_sm4_read_dcl_indexable_temp}, +- {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", +- shader_sm4_read_dcl_global_flags}, +- {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, +- {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, +- {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, +- {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, +- {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, +- {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, +- {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, +- {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, +- {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, +- {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, +- {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", +- shader_sm5_read_fcall}, +- {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, +- {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, +- {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, +- {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, +- {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, +- {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, +- {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, +- {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, +- {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, +- {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, +- {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, +- {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, +- {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, +- {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, +- {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, +- {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, +- {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, +- {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, +- {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, +- {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, +- {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, +- {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", +- shader_sm5_read_dcl_function_body}, +- {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", +- shader_sm5_read_dcl_function_table}, +- {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", +- shader_sm5_read_dcl_interface}, +- {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", +- shader_sm5_read_control_point_count}, +- {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", +- shader_sm5_read_control_point_count}, +- {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", +- shader_sm5_read_dcl_tessellator_domain}, +- {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", +- shader_sm5_read_dcl_tessellator_partitioning}, +- {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", +- shader_sm5_read_dcl_tessellator_output_primitive}, +- {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", +- shader_sm5_read_dcl_hs_max_tessfactor}, +- {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", +- shader_sm4_read_declaration_count}, +- {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", +- shader_sm4_read_declaration_count}, +- {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", +- shader_sm5_read_dcl_thread_group}, +- {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", +- shader_sm4_read_dcl_resource}, +- {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", +- shader_sm5_read_dcl_uav_raw}, +- {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", +- shader_sm5_read_dcl_uav_structured}, +- {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", +- shader_sm5_read_dcl_tgsm_raw}, +- {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", +- shader_sm5_read_dcl_tgsm_structured}, +- {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", +- shader_sm5_read_dcl_resource_raw}, +- {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", +- shader_sm5_read_dcl_resource_structured}, +- {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, +- {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, +- {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, +- {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, +- {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, +- {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, +- {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, +- {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, +- {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, +- {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, +- {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, +- {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, +- {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, +- {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, +- {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, +- {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, +- {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", +- shader_sm5_read_sync}, +- {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, +- {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, +- {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, +- {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, +- {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, +- {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, +- {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, +- {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, +- {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, +- {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, +- {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, +- {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, +- {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, +- {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, +- {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", +- shader_sm4_read_declaration_count}, +- {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, +- {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, +- {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, +- {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, +- {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, +- {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, +- {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, +- {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, +- {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, +- {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, +- {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, +- {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, +- {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, +- {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, +- {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, +- {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, +- {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, +- {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, +- {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, +- {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, +- {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, +- {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, +- {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, +- {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, +-}; +- +-static const enum vkd3d_shader_register_type register_type_table[] = +-{ +- /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, +- /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, +- /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, +- /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, +- /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, +- /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, +- /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, +- /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, +- /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, +- /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, +- /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, +- /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, +- /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, +- /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, +- /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, +- /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, +- /* UNKNOWN */ ~0u, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, +- /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, +- /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, +- /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, +- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, +- /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, +- /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, +- /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, +- /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, +- /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, +- /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, +- /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, +- /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, +- /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, +- /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, +- /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, +- /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, +- /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +-}; +- +-static const enum vkd3d_shader_register_precision register_precision_table[] = +-{ +- /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, +- /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, +- /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, +- /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, +- /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, +- /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, +-}; +- +-static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) +-{ +- unsigned int i; +- +- for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) +- { +- if (opcode == opcode_table[i].opcode) return &opcode_table[i]; +- } +- +- return NULL; +-} +- +-static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) +-{ +- switch (sm4->p.shader_version.type) +- { +- case VKD3D_SHADER_TYPE_PIXEL: +- if (reg->type == VKD3DSPR_OUTPUT) +- { +- unsigned int reg_idx = reg->idx[0].offset; +- +- if (reg_idx >= ARRAY_SIZE(sm4->output_map)) +- { +- ERR("Invalid output index %u.\n", reg_idx); +- break; +- } +- +- reg->type = VKD3DSPR_COLOROUT; +- reg->idx[0].offset = sm4->output_map[reg_idx]; +- } +- break; +- +- default: +- break; +- } +-} +- +-static enum vkd3d_data_type map_data_type(char t) +-{ +- switch (t) +- { +- case 'd': +- return VKD3D_DATA_DOUBLE; +- case 'f': +- return VKD3D_DATA_FLOAT; +- case 'i': +- return VKD3D_DATA_INT; +- case 'u': +- return VKD3D_DATA_UINT; +- case 'O': +- return VKD3D_DATA_OPAQUE; +- case 'R': +- return VKD3D_DATA_RESOURCE; +- case 'S': +- return VKD3D_DATA_SAMPLER; +- case 'U': +- return VKD3D_DATA_UAV; +- default: +- ERR("Invalid data type '%c'.\n", t); +- return VKD3D_DATA_FLOAT; +- } +-} +- +-static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) +-{ +- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); +- +- shader_instruction_array_destroy(&parser->instructions); +- free_shader_desc(&parser->shader_desc); +- vkd3d_free(sm4); +-} +- +-static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, +- const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) +-{ +- if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) +- { +- struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); +- +- if (!(reg_idx->rel_addr = rel_addr)) +- { +- ERR("Failed to get src param for relative addressing.\n"); +- return false; +- } +- +- if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) +- reg_idx->offset = *(*ptr)++; +- else +- reg_idx->offset = 0; +- shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); +- } +- else +- { +- reg_idx->rel_addr = NULL; +- reg_idx->offset = *(*ptr)++; +- } +- +- return true; +-} +- +-static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) +-{ +- switch (register_type) +- { +- case VKD3D_SM4_RT_SAMPLER: +- case VKD3D_SM4_RT_RESOURCE: +- case VKD3D_SM4_RT_CONSTBUFFER: +- case VKD3D_SM5_RT_UAV: +- return true; +- +- default: +- return false; +- } +-} +- +-static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, +- enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) +-{ +- enum vkd3d_sm4_register_precision precision; +- enum vkd3d_sm4_register_type register_type; +- enum vkd3d_sm4_extended_operand_type type; +- enum vkd3d_sm4_register_modifier m; +- uint32_t token, order, extended; +- +- if (*ptr >= end) +- { +- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); +- return false; +- } +- token = *(*ptr)++; +- +- register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; +- if (register_type >= ARRAY_SIZE(register_type_table) +- || register_type_table[register_type] == VKD3DSPR_INVALID) +- { +- FIXME("Unhandled register type %#x.\n", register_type); +- param->type = VKD3DSPR_TEMP; +- } +- else +- { +- param->type = register_type_table[register_type]; +- } +- param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; +- param->non_uniform = false; +- param->data_type = data_type; +- +- *modifier = VKD3DSPSM_NONE; +- if (token & VKD3D_SM4_EXTENDED_OPERAND) +- { +- if (*ptr >= end) +- { +- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); +- return false; +- } +- extended = *(*ptr)++; +- +- if (extended & VKD3D_SM4_EXTENDED_OPERAND) +- { +- FIXME("Skipping second-order extended operand.\n"); +- *ptr += *ptr < end; +- } +- +- type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; +- if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) +- { +- m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; +- switch (m) +- { +- case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: +- *modifier = VKD3DSPSM_NEG; +- break; +- +- case VKD3D_SM4_REGISTER_MODIFIER_ABS: +- *modifier = VKD3DSPSM_ABS; +- break; +- +- case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: +- *modifier = VKD3DSPSM_ABSNEG; +- break; +- +- default: +- FIXME("Unhandled register modifier %#x.\n", m); +- /* fall-through */ +- case VKD3D_SM4_REGISTER_MODIFIER_NONE: +- break; +- } +- +- precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; +- if (precision >= ARRAY_SIZE(register_precision_table) +- || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) +- { +- FIXME("Unhandled register precision %#x.\n", precision); +- param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; +- } +- else +- { +- param->precision = register_precision_table[precision]; +- } +- +- if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) +- param->non_uniform = true; +- +- extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK +- | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK +- | VKD3D_SM4_EXTENDED_OPERAND); +- if (extended) +- FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); +- } +- else if (type) +- { +- FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); +- } +- } +- +- order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; +- +- if (order < 1) +- { +- param->idx[0].offset = ~0u; +- param->idx[0].rel_addr = NULL; +- } +- else +- { +- DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; +- if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) +- { +- ERR("Failed to read register index.\n"); +- return false; +- } +- } +- +- if (order < 2) +- { +- param->idx[1].offset = ~0u; +- param->idx[1].rel_addr = NULL; +- } +- else +- { +- DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; +- if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) +- { +- ERR("Failed to read register index.\n"); +- return false; +- } +- } +- +- if (order < 3) +- { +- param->idx[2].offset = ~0u; +- param->idx[2].rel_addr = NULL; +- } +- else +- { +- DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; +- if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) +- { +- ERR("Failed to read register index.\n"); +- return false; +- } +- } +- +- if (order > 3) +- { +- WARN("Unhandled order %u.\n", order); +- return false; +- } +- +- if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) +- { +- enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; +- unsigned int dword_count; +- +- switch (dimension) +- { +- case VKD3D_SM4_DIMENSION_SCALAR: +- param->immconst_type = VKD3D_IMMCONST_SCALAR; +- dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); +- if (end - *ptr < dword_count) +- { +- WARN("Invalid ptr %p, end %p.\n", *ptr, end); +- return false; +- } +- memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); +- *ptr += dword_count; +- break; +- +- case VKD3D_SM4_DIMENSION_VEC4: +- param->immconst_type = VKD3D_IMMCONST_VEC4; +- if (end - *ptr < VKD3D_VEC4_SIZE) +- { +- WARN("Invalid ptr %p, end %p.\n", *ptr, end); +- return false; +- } +- memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); +- *ptr += 4; +- break; +- +- default: +- FIXME("Unhandled dimension %#x.\n", dimension); +- break; +- } +- } +- else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) +- { +- /* SM5.1 places a symbol identifier in idx[0] and moves +- * other values up one slot. Normalize to SM5.1. */ +- param->idx[2] = param->idx[1]; +- param->idx[1] = param->idx[0]; +- } +- +- map_register(priv, param); +- +- return true; +-} +- +-static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +-{ +- switch (reg->type) +- { +- case VKD3DSPR_COVERAGE: +- case VKD3DSPR_DEPTHOUT: +- case VKD3DSPR_DEPTHOUTGE: +- case VKD3DSPR_DEPTHOUTLE: +- case VKD3DSPR_GSINSTID: +- case VKD3DSPR_LOCALTHREADINDEX: +- case VKD3DSPR_OUTPOINTID: +- case VKD3DSPR_PRIMID: +- case VKD3DSPR_SAMPLEMASK: +- case VKD3DSPR_OUTSTENCILREF: +- return true; +- default: +- return false; +- } +-} +- +-static uint32_t swizzle_from_sm4(uint32_t s) +-{ +- return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); +-} +- +-static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, +- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) ++void dxbc_writer_init(struct dxbc_writer *dxbc) + { +- DWORD token; +- +- if (*ptr >= end) +- { +- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); +- return false; +- } +- token = **ptr; +- +- if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) +- { +- ERR("Failed to read parameter.\n"); +- return false; +- } +- +- if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) +- { +- src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; +- } +- else +- { +- enum vkd3d_sm4_swizzle_type swizzle_type = +- (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; +- +- switch (swizzle_type) +- { +- case VKD3D_SM4_SWIZZLE_NONE: +- if (shader_sm4_is_scalar_register(&src_param->reg)) +- src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +- else +- src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; +- break; +- +- case VKD3D_SM4_SWIZZLE_SCALAR: +- src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; +- src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; +- break; +- +- case VKD3D_SM4_SWIZZLE_VEC4: +- src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); +- break; +- +- default: +- FIXME("Unhandled swizzle type %#x.\n", swizzle_type); +- break; +- } +- } +- +- return true; ++ memset(dxbc, 0, sizeof(*dxbc)); + } + +-static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, +- const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) ++void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) + { +- enum vkd3d_shader_src_modifier modifier; +- DWORD token; +- +- if (*ptr >= end) +- { +- WARN("Invalid ptr %p >= end %p.\n", *ptr, end); +- return false; +- } +- token = **ptr; +- +- if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) +- { +- ERR("Failed to read parameter.\n"); +- return false; +- } +- +- if (modifier != VKD3DSPSM_NONE) +- { +- ERR("Invalid source modifier %#x on destination register.\n", modifier); +- return false; +- } ++ struct vkd3d_shader_dxbc_section_desc *section; + +- dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; +- if (data_type == VKD3D_DATA_DOUBLE) +- dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); +- /* Scalar registers are declared with no write mask in shader bytecode. */ +- if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) +- dst_param->write_mask = VKD3DSP_WRITEMASK_0; +- dst_param->modifiers = 0; +- dst_param->shift = 0; ++ assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); + +- return true; ++ section = &dxbc->sections[dxbc->section_count++]; ++ section->tag = tag; ++ section->data.code = data; ++ section->data.size = size; + } + +-static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) ++int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, ++ struct vkd3d_shader_code *dxbc, char **messages) + { +- enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; +- +- switch (modifier_type) +- { +- case VKD3D_SM4_MODIFIER_AOFFIMMI: +- { +- static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER +- | VKD3D_SM4_MODIFIER_MASK +- | VKD3D_SM4_AOFFIMMI_U_MASK +- | VKD3D_SM4_AOFFIMMI_V_MASK +- | VKD3D_SM4_AOFFIMMI_W_MASK; +- +- /* Bit fields are used for sign extension. */ +- struct +- { +- int u : 4; +- int v : 4; +- int w : 4; +- } aoffimmi; +- +- if (modifier & ~recognized_bits) +- FIXME("Unhandled instruction modifier %#x.\n", modifier); +- +- aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; +- aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; +- aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; +- ins->texel_offset.u = aoffimmi.u; +- ins->texel_offset.v = aoffimmi.v; +- ins->texel_offset.w = aoffimmi.w; +- break; +- } +- +- case VKD3D_SM5_MODIFIER_DATA_TYPE: +- { +- DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; +- unsigned int i; +- +- for (i = 0; i < VKD3D_VEC4_SIZE; i++) +- { +- enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); +- +- if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) +- { +- FIXME("Unhandled data type %#x.\n", data_type); +- ins->resource_data_type[i] = VKD3D_DATA_FLOAT; +- } +- else +- { +- ins->resource_data_type[i] = data_type_table[data_type]; +- } +- } +- break; +- } +- +- case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: +- { +- enum vkd3d_sm4_resource_type resource_type +- = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; +- +- if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) +- ins->raw = true; +- else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) +- ins->structured = true; +- +- if (resource_type < ARRAY_SIZE(resource_type_table)) +- ins->resource_type = resource_type_table[resource_type]; +- else +- { +- FIXME("Unhandled resource type %#x.\n", resource_type); +- ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; +- } +- +- ins->resource_stride +- = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; +- break; +- } ++ size_t size_position, offsets_position, checksum_position, i; ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ uint32_t checksum[4]; + +- default: +- FIXME("Unhandled instruction modifier %#x.\n", modifier); +- } +-} ++ TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); + +-static void shader_sm4_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) +-{ +- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); +- const struct vkd3d_sm4_opcode_info *opcode_info; +- uint32_t opcode_token, opcode, previous_token; +- struct vkd3d_shader_dst_param *dst_params; +- struct vkd3d_shader_src_param *src_params; +- const uint32_t **ptr = &parser->ptr; +- unsigned int i, len; +- size_t remaining; +- const uint32_t *p; +- DWORD precise; ++ if (messages) ++ *messages = NULL; + +- if (*ptr >= sm4->end) +- { +- WARN("End of byte-code, failed to read opcode.\n"); +- goto fail; +- } +- remaining = sm4->end - *ptr; ++ put_u32(&buffer, TAG_DXBC); + +- ++parser->location.line; ++ checksum_position = bytecode_get_size(&buffer); ++ for (i = 0; i < 4; ++i) ++ put_u32(&buffer, 0); + +- opcode_token = *(*ptr)++; +- opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; ++ put_u32(&buffer, 1); /* version */ ++ size_position = put_u32(&buffer, 0); ++ put_u32(&buffer, section_count); + +- len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); +- if (!len) +- { +- if (remaining < 2) +- { +- WARN("End of byte-code, failed to read length token.\n"); +- goto fail; +- } +- len = **ptr; +- } +- if (!len || remaining < len) +- { +- WARN("Read invalid length %u (remaining %zu).\n", len, remaining); +- goto fail; +- } +- --len; ++ offsets_position = bytecode_get_size(&buffer); ++ for (i = 0; i < section_count; ++i) ++ put_u32(&buffer, 0); + +- if (!(opcode_info = get_opcode_info(opcode))) ++ for (i = 0; i < section_count; ++i) + { +- FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); +- ins->handler_idx = VKD3DSIH_INVALID; +- *ptr += len; +- return; +- } +- +- ins->handler_idx = opcode_info->handler_idx; +- ins->flags = 0; +- ins->coissue = false; +- ins->raw = false; +- ins->structured = false; +- ins->predicate = NULL; +- ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); +- ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); +- ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); +- if (!src_params && ins->src_count) +- { +- ERR("Failed to allocate src parameters.\n"); +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; +- return; ++ set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_align(&buffer)); ++ put_u32(&buffer, sections[i].tag); ++ put_u32(&buffer, sections[i].data.size); ++ bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); + } +- ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; +- ins->resource_stride = 0; +- ins->resource_data_type[0] = VKD3D_DATA_FLOAT; +- ins->resource_data_type[1] = VKD3D_DATA_FLOAT; +- ins->resource_data_type[2] = VKD3D_DATA_FLOAT; +- ins->resource_data_type[3] = VKD3D_DATA_FLOAT; +- memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); ++ set_u32(&buffer, size_position, bytecode_get_size(&buffer)); + +- p = *ptr; +- *ptr += len; ++ vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); ++ for (i = 0; i < 4; ++i) ++ set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); + +- if (opcode_info->read_opcode_func) +- { +- ins->dst = NULL; +- ins->dst_count = 0; +- opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); +- } +- else ++ if (!buffer.status) + { +- enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; +- +- previous_token = opcode_token; +- while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) +- shader_sm4_read_instruction_modifier(previous_token = *p++, ins); +- +- ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) +- { +- ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; +- instruction_dst_modifier = VKD3DSPDM_SATURATE; +- } +- precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; +- ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; +- +- ins->dst = dst_params = shader_parser_get_dst_params(parser, ins->dst_count); +- if (!dst_params && ins->dst_count) +- { +- ERR("Failed to allocate dst parameters.\n"); +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; +- return; +- } +- for (i = 0; i < ins->dst_count; ++i) +- { +- if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), +- &dst_params[i]))) +- { +- ins->handler_idx = VKD3DSIH_INVALID; +- return; +- } +- dst_params[i].modifiers |= instruction_dst_modifier; +- } +- +- for (i = 0; i < ins->src_count; ++i) +- { +- if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), +- &src_params[i]))) +- { +- ins->handler_idx = VKD3DSIH_INVALID; +- return; +- } +- } ++ dxbc->code = buffer.data; ++ dxbc->size = buffer.size; + } +- +- return; +- +-fail: +- *ptr = sm4->end; +- ins->handler_idx = VKD3DSIH_INVALID; +- return; +-} +- +-static bool shader_sm4_is_end(struct vkd3d_shader_parser *parser) +-{ +- struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); +- +- return parser->ptr == sm4->end; ++ return buffer.status; + } + +-static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +-{ +- .parser_destroy = shader_sm4_destroy, +-}; +- +-static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, +- size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, +- struct vkd3d_shader_message_context *message_context) ++int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) + { +- struct vkd3d_shader_version version; +- uint32_t version_token, token_count; +- unsigned int i; +- +- if (byte_code_size / sizeof(*byte_code) < 2) +- { +- WARN("Invalid byte code size %lu.\n", (long)byte_code_size); +- return false; +- } +- +- version_token = byte_code[0]; +- TRACE("Version: 0x%08x.\n", version_token); +- token_count = byte_code[1]; +- TRACE("Token count: %u.\n", token_count); +- +- if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) +- { +- WARN("Invalid token count %u.\n", token_count); +- return false; +- } +- +- sm4->start = &byte_code[2]; +- sm4->end = &byte_code[token_count]; +- +- switch (version_token >> 16) +- { +- case VKD3D_SM4_PS: +- version.type = VKD3D_SHADER_TYPE_PIXEL; +- break; +- +- case VKD3D_SM4_VS: +- version.type = VKD3D_SHADER_TYPE_VERTEX; +- break; +- +- case VKD3D_SM4_GS: +- version.type = VKD3D_SHADER_TYPE_GEOMETRY; +- break; +- +- case VKD3D_SM5_HS: +- version.type = VKD3D_SHADER_TYPE_HULL; +- break; +- +- case VKD3D_SM5_DS: +- version.type = VKD3D_SHADER_TYPE_DOMAIN; +- break; +- +- case VKD3D_SM5_CS: +- version.type = VKD3D_SHADER_TYPE_COMPUTE; +- break; +- +- default: +- FIXME("Unrecognised shader type %#x.\n", version_token >> 16); +- } +- version.major = VKD3D_SM4_VERSION_MAJOR(version_token); +- version.minor = VKD3D_SM4_VERSION_MINOR(version_token); +- +- /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, +- token_count / 7u + 20)) +- return false; +- sm4->p.ptr = sm4->start; +- +- memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); +- for (i = 0; i < output_signature->element_count; ++i) +- { +- struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; +- +- if (version.type == VKD3D_SHADER_TYPE_PIXEL +- && ascii_strcasecmp(e->semantic_name, "SV_Target")) +- continue; +- if (e->register_index >= ARRAY_SIZE(sm4->output_map)) +- { +- WARN("Invalid output index %u.\n", e->register_index); +- continue; +- } +- +- sm4->output_map[e->register_index] = e->semantic_index; +- } +- +- return true; ++ return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); + } + + static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) +@@ -1928,12 +322,12 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, + } + + static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *section, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *s) ++ struct vkd3d_shader_message_context *message_context, struct shader_signature *s) + { + bool has_stream_index, has_min_precision; +- struct vkd3d_shader_signature_element *e; + const char *data = section->data.code; + uint32_t count, header_size; ++ struct signature_element *e; + const char *ptr = data; + unsigned int i; + +@@ -1979,6 +373,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s + { + uint32_t name_offset, mask; + ++ e[i].sort_index = i; ++ + if (has_stream_index) + read_dword(&ptr, &e[i].stream_index); + else +@@ -1995,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s + read_dword(&ptr, &e[i].sysval_semantic); + read_dword(&ptr, &e[i].component_type); + read_dword(&ptr, &e[i].register_index); ++ e[i].register_count = 1; + read_dword(&ptr, &mask); + e[i].mask = mask & 0xff; + e[i].used_mask = (mask >> 8) & 0xff; +@@ -2029,7 +426,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s + static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, + struct vkd3d_shader_message_context *message_context, void *ctx) + { +- struct vkd3d_shader_signature *is = ctx; ++ struct shader_signature *is = ctx; + + if (section->tag != TAG_ISGN) + return VKD3D_OK; +@@ -2037,13 +434,13 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, + if (is->elements) + { + FIXME("Multiple input signatures.\n"); +- vkd3d_shader_free_shader_signature(is); ++ shader_signature_cleanup(is); + } + return shader_parse_signature(section, message_context, is); + } + + int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature) ++ struct vkd3d_shader_message_context *message_context, struct shader_signature *signature) + { + int ret; + +@@ -2122,12 +519,12 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + + void free_shader_desc(struct vkd3d_shader_desc *desc) + { +- vkd3d_shader_free_shader_signature(&desc->input_signature); +- vkd3d_shader_free_shader_signature(&desc->output_signature); +- vkd3d_shader_free_shader_signature(&desc->patch_constant_signature); ++ shader_signature_cleanup(&desc->input_signature); ++ shader_signature_cleanup(&desc->output_signature); ++ shader_signature_cleanup(&desc->patch_constant_signature); + } + +-static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, ++int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) + { + int ret; +@@ -2151,66 +548,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + return ret; + } + +-int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +-{ +- struct vkd3d_shader_instruction_array *instructions; +- struct vkd3d_shader_desc *shader_desc; +- struct vkd3d_shader_instruction *ins; +- struct vkd3d_shader_sm4_parser *sm4; +- int ret; +- +- if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) +- { +- ERR("Failed to allocate parser.\n"); +- return VKD3D_ERROR_OUT_OF_MEMORY; +- } +- +- shader_desc = &sm4->p.shader_desc; +- if ((ret = shader_extract_from_dxbc(&compile_info->source, +- message_context, compile_info->source_name, shader_desc)) < 0) +- { +- WARN("Failed to extract shader, vkd3d result %d.\n", ret); +- vkd3d_free(sm4); +- return ret; +- } +- +- if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, +- compile_info->source_name, &shader_desc->output_signature, message_context)) +- { +- WARN("Failed to initialise shader parser.\n"); +- free_shader_desc(shader_desc); +- vkd3d_free(sm4); +- return VKD3D_ERROR_INVALID_ARGUMENT; +- } +- +- instructions = &sm4->p.instructions; +- while (!shader_sm4_is_end(&sm4->p)) +- { +- if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) +- { +- ERR("Failed to allocate instructions.\n"); +- vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- shader_sm4_destroy(&sm4->p); +- return VKD3D_ERROR_OUT_OF_MEMORY; +- } +- ins = &instructions->elements[instructions->count]; +- shader_sm4_read_instruction(&sm4->p, ins); +- +- if (ins->handler_idx == VKD3DSIH_INVALID) +- { +- WARN("Encountered unrecognized or invalid instruction.\n"); +- shader_sm4_destroy(&sm4->p); +- return VKD3D_ERROR_OUT_OF_MEMORY; +- } +- ++instructions->count; +- } +- +- *parser = &sm4->p; +- +- return VKD3D_OK; +-} +- + /* root signatures */ + #define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE + +@@ -2862,7 +1199,7 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co + size_t parameters_position; + unsigned int i; + +- parameters_position = bytecode_get_size(buffer); ++ parameters_position = bytecode_align(buffer); + for (i = 0; i < parameter_count; ++i) + { + put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 64d6e87065b..5bca84ba38a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) + + void hlsl_free_var(struct hlsl_ir_var *decl) + { ++ unsigned int k; ++ + vkd3d_free((void *)decl->name); + hlsl_cleanup_semantic(&decl->semantic); ++ for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) ++ vkd3d_free((void *)decl->objects_usage[k]); + vkd3d_free(decl); + } + +@@ -126,7 +130,7 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) + + unsigned int hlsl_type_minor_size(const struct hlsl_type *type) + { +- if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) ++ if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + return type->dimx; + else + return type->dimy; +@@ -134,7 +138,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type) + + unsigned int hlsl_type_major_size(const struct hlsl_type *type) + { +- if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) ++ if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + return type->dimy; + else + return type->dimx; +@@ -142,7 +146,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type) + + unsigned int hlsl_type_element_count(const struct hlsl_type *type) + { +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_VECTOR: + return type->dimx; +@@ -157,16 +161,26 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) + } + } + +-static unsigned int get_array_size(const struct hlsl_type *type) ++const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type) + { +- if (type->type == HLSL_CLASS_ARRAY) +- return get_array_size(type->e.array.type) * type->e.array.elements_count; ++ if (type->class == HLSL_CLASS_ARRAY) ++ return hlsl_get_multiarray_element_type(type->e.array.type); ++ return type; ++} ++ ++unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) ++{ ++ if (type->class == HLSL_CLASS_ARRAY) ++ return hlsl_get_multiarray_size(type->e.array.type) * type->e.array.elements_count; + return 1; + } + + bool hlsl_type_is_resource(const struct hlsl_type *type) + { +- if (type->type == HLSL_CLASS_OBJECT) ++ if (type->class == HLSL_CLASS_ARRAY) ++ return hlsl_type_is_resource(type->e.array.type); ++ ++ if (type->class == HLSL_CLASS_OBJECT) + { + switch (type->base_type) + { +@@ -183,10 +197,13 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) + + enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) + { +- if (type->type <= HLSL_CLASS_LAST_NUMERIC) ++ if (type->class <= HLSL_CLASS_LAST_NUMERIC) + return HLSL_REGSET_NUMERIC; + +- if (type->type == HLSL_CLASS_OBJECT) ++ if (type->class == HLSL_CLASS_ARRAY) ++ return hlsl_type_get_regset(type->e.array.type); ++ ++ if (type->class == HLSL_CLASS_OBJECT) + { + switch (type->base_type) + { +@@ -203,8 +220,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) + vkd3d_unreachable(); + } + } +- else if (type->type == HLSL_CLASS_ARRAY) +- return hlsl_type_get_regset(type->e.array.type); + + vkd3d_unreachable(); + } +@@ -216,7 +231,8 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int + * (b) the type would cross a vec4 boundary; i.e. a vec3 and a + * vec1 can be packed together, but not a vec3 and a vec2. + */ +- if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) ++ if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY ++ || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) + return align(offset, 4); + return offset; + } +@@ -229,7 +245,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + for (k = 0; k <= HLSL_REGSET_LAST; ++k) + type->reg_size[k] = 0; + +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +@@ -278,7 +294,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + type->reg_size[k] += field->type->reg_size[k]; + } + +- type->dimx += field->type->dimx * field->type->dimy * get_array_size(field->type); ++ type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); + } + break; + } +@@ -317,7 +333,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e + vkd3d_free(type); + return NULL; + } +- type->type = type_class; ++ type->class = type_class; + type->base_type = base_type; + type->dimx = dimx; + type->dimy = dimy; +@@ -330,7 +346,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e + + static bool type_is_single_component(const struct hlsl_type *type) + { +- return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_OBJECT; ++ return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; + } + + /* Given a type and a component index, this function moves one step through the path required to +@@ -349,7 +365,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + assert(!type_is_single_component(type)); + assert(index < hlsl_type_component_count(type)); + +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_VECTOR: + assert(index < type->dimx); +@@ -427,7 +443,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl + return true; + } + +- if (!(deref->path = hlsl_alloc(ctx, sizeof(*deref->path) * deref->path_len))) ++ if (!(deref->path = hlsl_calloc(ctx, deref->path_len, sizeof(*deref->path)))) + { + deref->var = NULL; + deref->path_len = 0; +@@ -437,6 +453,71 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl + return true; + } + ++bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain) ++{ ++ struct hlsl_ir_index *index; ++ struct hlsl_ir_load *load; ++ unsigned int chain_len, i; ++ struct hlsl_ir_node *ptr; ++ ++ deref->path = NULL; ++ deref->path_len = 0; ++ deref->offset.node = NULL; ++ ++ assert(chain); ++ if (chain->type == HLSL_IR_INDEX) ++ assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); ++ ++ /* Find the length of the index chain */ ++ chain_len = 0; ++ ptr = chain; ++ while (ptr->type == HLSL_IR_INDEX) ++ { ++ index = hlsl_ir_index(ptr); ++ ++ chain_len++; ++ ptr = index->val.node; ++ } ++ ++ if (ptr->type != HLSL_IR_LOAD) ++ { ++ hlsl_error(ctx, &chain->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid l-value."); ++ return false; ++ } ++ load = hlsl_ir_load(ptr); ++ ++ if (!init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) ++ return false; ++ ++ for (i = 0; i < load->src.path_len; ++i) ++ hlsl_src_from_node(&deref->path[i], load->src.path[i].node); ++ ++ chain_len = 0; ++ ptr = chain; ++ while (ptr->type == HLSL_IR_INDEX) ++ { ++ unsigned int p = deref->path_len - 1 - chain_len; ++ ++ index = hlsl_ir_index(ptr); ++ if (hlsl_index_is_noncontiguous(index)) ++ { ++ hlsl_src_from_node(&deref->path[p], deref->path[p + 1].node); ++ hlsl_src_remove(&deref->path[p + 1]); ++ hlsl_src_from_node(&deref->path[p + 1], index->idx.node); ++ } ++ else ++ { ++ hlsl_src_from_node(&deref->path[p], index->idx.node); ++ } ++ ++ chain_len++; ++ ptr = index->val.node; ++ } ++ assert(deref->path_len == load->src.path_len + chain_len); ++ ++ return true; ++} ++ + struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) + { + struct hlsl_type *type; +@@ -459,9 +540,9 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl + { + unsigned int path_len, path_index, deref_path_len, i; + struct hlsl_type *path_type; +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *c; + +- list_init(&block->instrs); ++ hlsl_block_init(block); + + path_len = 0; + path_type = hlsl_deref_get_type(ctx, prefix); +@@ -487,12 +568,12 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl + + if (!(c = hlsl_new_uint_constant(ctx, next_index, loc))) + { +- hlsl_free_instr_list(&block->instrs); ++ hlsl_block_cleanup(block); + return false; + } +- list_add_tail(&block->instrs, &c->node.entry); ++ hlsl_block_add_instr(block, c); + +- hlsl_src_from_node(&deref->path[deref_path_len++], &c->node); ++ hlsl_src_from_node(&deref->path[deref_path_len++], c); + } + + assert(deref_path_len == deref->path_len); +@@ -505,7 +586,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co + { + assert(idx); + +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_VECTOR: + return hlsl_get_scalar_type(ctx, type->base_type); +@@ -523,8 +604,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co + { + struct hlsl_ir_constant *c = hlsl_ir_constant(idx); + +- assert(c->value[0].u < type->e.record.field_count); +- return type->e.record.fields[c->value[0].u].type; ++ assert(c->value.u[0].u < type->e.record.field_count); ++ return type->e.record.fields[c->value.u[0].u].type; + } + + default: +@@ -539,7 +620,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + +- type->type = HLSL_CLASS_ARRAY; ++ type->class = HLSL_CLASS_ARRAY; + type->modifiers = basic_type->modifiers; + type->e.array.elements_count = array_size; + type->e.array.type = basic_type; +@@ -559,7 +640,7 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; +- type->type = HLSL_CLASS_STRUCT; ++ type->class = HLSL_CLASS_STRUCT; + type->base_type = HLSL_TYPE_VOID; + type->name = name; + type->dimy = 1; +@@ -579,7 +660,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; +- type->type = HLSL_CLASS_OBJECT; ++ type->class = HLSL_CLASS_OBJECT; + type->base_type = HLSL_TYPE_TEXTURE; + type->dimx = 4; + type->dimy = 1; +@@ -597,7 +678,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim + + if (!(type = vkd3d_calloc(1, sizeof(*type)))) + return NULL; +- type->type = HLSL_CLASS_OBJECT; ++ type->class = HLSL_CLASS_OBJECT; + type->base_type = HLSL_TYPE_UAV; + type->dimx = format->dimx; + type->dimy = 1; +@@ -614,6 +695,8 @@ static const char * get_case_insensitive_typename(const char *name) + { + "dword", + "float", ++ "matrix", ++ "vector", + }; + unsigned int i; + +@@ -677,7 +760,7 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha + + unsigned int hlsl_type_component_count(const struct hlsl_type *type) + { +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +@@ -709,7 +792,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + if (t1 == t2) + return true; + +- if (t1->type != t2->type) ++ if (t1->class != t2->class) + return false; + if (t1->base_type != t2->base_type) + return false; +@@ -729,7 +812,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + return false; + if (t1->dimy != t2->dimy) + return false; +- if (t1->type == HLSL_CLASS_STRUCT) ++ if (t1->class == HLSL_CLASS_STRUCT) + { + size_t i; + +@@ -748,7 +831,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + return false; + } + } +- if (t1->type == HLSL_CLASS_ARRAY) ++ if (t1->class == HLSL_CLASS_ARRAY) + return t1->e.array.elements_count == t2->e.array.elements_count + && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); + +@@ -772,7 +855,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + return NULL; + } + } +- type->type = old->type; ++ type->class = old->class; + type->base_type = old->base_type; + type->dimx = old->dimx; + type->dimy = old->dimy; +@@ -781,7 +864,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + type->modifiers |= default_majority; + type->sampler_dim = old->sampler_dim; + type->is_minimum_precision = old->is_minimum_precision; +- switch (old->type) ++ switch (old->class) + { + case HLSL_CLASS_ARRAY: + if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) +@@ -799,7 +882,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, + + type->e.record.field_count = field_count; + +- if (!(type->e.record.fields = hlsl_alloc(ctx, field_count * sizeof(*type->e.record.fields)))) ++ if (!(type->e.record.fields = hlsl_calloc(ctx, field_count, sizeof(*type->e.record.fields)))) + { + vkd3d_free((void *)type->name); + vkd3d_free(type); +@@ -848,40 +931,58 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) + return true; + } + +-struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, ++struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, + const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *cast; + +- cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, *loc); ++ cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, loc); + if (cast) + cast->data_type = type; +- return hlsl_ir_expr(cast); ++ return cast; + } + +-struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) ++struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) + { + /* Use a cast to the same type as a makeshift identity expression. */ + return hlsl_new_cast(ctx, node, node->data_type, &node->loc); + } + + struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, +- const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, ++ const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct hlsl_reg_reservation *reg_reservation) + { + struct hlsl_ir_var *var; ++ unsigned int k; + + if (!(var = hlsl_alloc(ctx, sizeof(*var)))) + return NULL; + + var->name = name; + var->data_type = type; +- var->loc = loc; ++ var->loc = *loc; + if (semantic) + var->semantic = *semantic; + var->storage_modifiers = modifiers; + if (reg_reservation) + var->reg_reservation = *reg_reservation; ++ ++ for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) ++ { ++ unsigned int i, obj_count = type->reg_size[k]; ++ ++ if (obj_count == 0) ++ continue; ++ ++ if (!(var->objects_usage[k] = hlsl_calloc(ctx, obj_count, sizeof(*var->objects_usage[0])))) ++ { ++ for (i = 0; i < k; ++i) ++ vkd3d_free(var->objects_usage[i]); ++ vkd3d_free(var); ++ return NULL; ++ } ++ } ++ + return var; + } + +@@ -901,7 +1002,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem + hlsl_release_string_buffer(ctx, string); + return NULL; + } +- var = hlsl_new_var(ctx, name, type, *loc, NULL, 0, NULL); ++ var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); + hlsl_release_string_buffer(ctx, string); + if (var) + list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); +@@ -910,7 +1011,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem + + static bool type_is_single_reg(const struct hlsl_type *type) + { +- return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; ++ return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_VECTOR; + } + + bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other) +@@ -964,7 +1065,7 @@ static void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, + list_init(&node->uses); + } + +-struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) ++struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) + { + struct hlsl_deref lhs_deref; + +@@ -972,7 +1073,7 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir + return hlsl_new_store_index(ctx, &lhs_deref, NULL, rhs, 0, &rhs->loc); + } + +-struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, ++struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, + struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_store *store; +@@ -1001,35 +1102,35 @@ struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hl + writemask = (1 << rhs->data_type->dimx) - 1; + store->writemask = writemask; + +- return store; ++ return &store->node; + } + +-struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) + { + struct hlsl_block comp_path_block; + struct hlsl_ir_store *store; + +- list_init(&block->instrs); ++ hlsl_block_init(block); + + if (!(store = hlsl_alloc(ctx, sizeof(*store)))) +- return NULL; ++ return false; + init_node(&store->node, HLSL_IR_STORE, NULL, &rhs->loc); + + if (!init_deref_from_component_index(ctx, &comp_path_block, &store->lhs, lhs, comp, &rhs->loc)) + { + vkd3d_free(store); +- return NULL; ++ return false; + } +- list_move_tail(&block->instrs, &comp_path_block.instrs); ++ hlsl_block_add_block(block, &comp_path_block); + hlsl_src_from_node(&store->rhs, rhs); + + if (type_is_single_reg(rhs->data_type)) + store->writemask = (1 << rhs->data_type->dimx) - 1; + +- list_add_tail(&block->instrs, &store->node.entry); ++ hlsl_block_add_instr(block, &store->node); + +- return store; ++ return true; + } + + struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, +@@ -1050,7 +1151,7 @@ struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_typ + { + struct hlsl_ir_constant *c; + +- assert(type->type <= HLSL_CLASS_VECTOR); ++ assert(type->class <= HLSL_CLASS_VECTOR); + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + return NULL; +@@ -1060,41 +1161,40 @@ struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_typ + return c; + } + +-struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) ++struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_constant *c; + + if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) +- c->value[0].u = b ? ~0u : 0; ++ c->value.u[0].u = b ? ~0u : 0; + +- return c; ++ return &c->node; + } + +-struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, ++struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, + const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_constant *c; + + if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) +- c->value[0].f = f; ++ c->value.u[0].f = f; + +- return c; ++ return &c->node; + } + +-struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, +- const struct vkd3d_shader_location *loc) ++struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_constant *c; + + c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); + + if (c) +- c->value[0].i = n; ++ c->value.u[0].i = n; + +- return c; ++ return &c->node; + } + +-struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, ++struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, + const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_constant *c; +@@ -1102,9 +1202,9 @@ struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned i + c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + + if (c) +- c->value[0].u = n; ++ c->value.u[0].u = n; + +- return c; ++ return &c->node; + } + + struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, +@@ -1124,11 +1224,11 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op + } + + struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, +- struct hlsl_ir_node *arg, struct vkd3d_shader_location loc) ++ struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg}; + +- return hlsl_new_expr(ctx, op, operands, arg->data_type, &loc); ++ return hlsl_new_expr(ctx, op, operands, arg->data_type, loc); + } + + struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, +@@ -1140,17 +1240,21 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); + } + +-struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc) ++struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, ++ struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_if *iff; + + if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) + return NULL; +- init_node(&iff->node, HLSL_IR_IF, NULL, &loc); ++ init_node(&iff->node, HLSL_IR_IF, NULL, loc); + hlsl_src_from_node(&iff->condition, condition); +- list_init(&iff->then_instrs.instrs); +- list_init(&iff->else_instrs.instrs); +- return iff; ++ hlsl_block_init(&iff->then_block); ++ hlsl_block_add_block(&iff->then_block, then_block); ++ hlsl_block_init(&iff->else_block); ++ if (else_block) ++ hlsl_block_add_block(&iff->else_block, else_block); ++ return &iff->node; + } + + struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, +@@ -1184,22 +1288,22 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl + } + + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, +- struct vkd3d_shader_location loc) ++ const struct vkd3d_shader_location *loc) + { + struct hlsl_deref var_deref; + + hlsl_init_simple_deref_from_var(&var_deref, var); +- return hlsl_new_load_index(ctx, &var_deref, NULL, &loc); ++ return hlsl_new_load_index(ctx, &var_deref, NULL, loc); + } + +-struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc) + { + struct hlsl_type *type, *comp_type; + struct hlsl_block comp_path_block; + struct hlsl_ir_load *load; + +- list_init(&block->instrs); ++ hlsl_block_init(block); + + if (!(load = hlsl_alloc(ctx, sizeof(*load)))) + return NULL; +@@ -1213,14 +1317,14 @@ struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b + vkd3d_free(load); + return NULL; + } +- list_move_tail(&block->instrs, &comp_path_block.instrs); ++ hlsl_block_add_block(block, &comp_path_block); + +- list_add_tail(&block->instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + +- return load; ++ return &load->node; + } + +-struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, ++struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, + const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_resource_load *load; +@@ -1229,24 +1333,36 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, + return NULL; + init_node(&load->node, HLSL_IR_RESOURCE_LOAD, params->format, loc); + load->load_type = params->type; +- if (!hlsl_copy_deref(ctx, &load->resource, ¶ms->resource)) ++ ++ if (!hlsl_init_deref_from_index_chain(ctx, &load->resource, params->resource)) + { + vkd3d_free(load); + return NULL; + } +- if (!hlsl_copy_deref(ctx, &load->sampler, ¶ms->sampler)) ++ ++ if (params->sampler) + { +- hlsl_cleanup_deref(&load->resource); +- vkd3d_free(load); +- return NULL; ++ if (!hlsl_init_deref_from_index_chain(ctx, &load->sampler, params->sampler)) ++ { ++ hlsl_cleanup_deref(&load->resource); ++ vkd3d_free(load); ++ return NULL; ++ } + } ++ + hlsl_src_from_node(&load->coords, params->coords); ++ hlsl_src_from_node(&load->sample_index, params->sample_index); + hlsl_src_from_node(&load->texel_offset, params->texel_offset); + hlsl_src_from_node(&load->lod, params->lod); +- return load; ++ hlsl_src_from_node(&load->ddx, params->ddx); ++ hlsl_src_from_node(&load->ddy, params->ddy); ++ load->sampling_dim = params->sampling_dim; ++ if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) ++ load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; ++ return &load->node; + } + +-struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, ++struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, + struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_resource_store *store; +@@ -1257,10 +1373,10 @@ struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, con + hlsl_copy_deref(ctx, &store->resource, resource); + hlsl_src_from_node(&store->coords, coords); + hlsl_src_from_node(&store->value, value); +- return store; ++ return &store->node; + } + +-struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, ++struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_swizzle *swizzle; +@@ -1275,29 +1391,66 @@ struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned + init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); + hlsl_src_from_node(&swizzle->val, val); + swizzle->swizzle = s; +- return swizzle; ++ return &swizzle->node; ++} ++ ++bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) ++{ ++ struct hlsl_type *type = index->val.node->data_type; ++ ++ return type->class == HLSL_CLASS_MATRIX && !hlsl_type_is_row_major(type); ++} ++ ++bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) ++{ ++ return index->val.node->data_type->class == HLSL_CLASS_OBJECT; ++} ++ ++struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, ++ struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_type *type = val->data_type; ++ struct hlsl_ir_index *index; ++ ++ if (!(index = hlsl_alloc(ctx, sizeof(*index)))) ++ return NULL; ++ ++ if (type->class == HLSL_CLASS_OBJECT) ++ type = type->e.resource_format; ++ else if (type->class == HLSL_CLASS_MATRIX) ++ type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); ++ else ++ type = hlsl_get_element_type_from_path_index(ctx, type, idx); ++ ++ init_node(&index->node, HLSL_IR_INDEX, type, loc); ++ hlsl_src_from_node(&index->val, val); ++ hlsl_src_from_node(&index->idx, idx); ++ return &index->node; + } + +-struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc) ++struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, ++ const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_jump *jump; + + if (!(jump = hlsl_alloc(ctx, sizeof(*jump)))) + return NULL; +- init_node(&jump->node, HLSL_IR_JUMP, NULL, &loc); ++ init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); + jump->type = type; +- return jump; ++ return &jump->node; + } + +-struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc) ++struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_loop *loop; + + if (!(loop = hlsl_alloc(ctx, sizeof(*loop)))) + return NULL; +- init_node(&loop->node, HLSL_IR_LOOP, NULL, &loc); +- list_init(&loop->body.instrs); +- return loop; ++ init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); ++ hlsl_block_init(&loop->body); ++ hlsl_block_add_block(&loop->body, block); ++ return &loop->node; + } + + struct clone_instr_map +@@ -1319,11 +1472,13 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, + const struct hlsl_ir_node *src; + struct hlsl_ir_node *dst; + ++ hlsl_block_init(dst_block); ++ + LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry) + { + if (!(dst = clone_instr(ctx, map, src))) + { +- hlsl_free_instr_list(&dst_block->instrs); ++ hlsl_block_cleanup(dst_block); + return false; + } + list_add_tail(&dst_block->instrs, &dst->entry); +@@ -1332,7 +1487,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, + { + if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) + { +- hlsl_free_instr_list(&dst_block->instrs); ++ hlsl_block_cleanup(dst_block); + return false; + } + +@@ -1394,7 +1549,7 @@ static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_ + + if (!(dst = hlsl_new_constant(ctx, src->node.data_type, &src->node.loc))) + return NULL; +- memcpy(dst->value, src->value, sizeof(src->value)); ++ dst->value = src->value; + return &dst->node; + } + +@@ -1411,27 +1566,30 @@ static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_ + + static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) + { +- struct hlsl_ir_if *dst; ++ struct hlsl_block then_block, else_block; ++ struct hlsl_ir_node *dst; + +- if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) ++ if (!clone_block(ctx, &then_block, &src->then_block, map)) + return NULL; ++ if (!clone_block(ctx, &else_block, &src->else_block, map)) ++ { ++ hlsl_block_cleanup(&then_block); ++ return NULL; ++ } + +- if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) +- || !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map)) ++ if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc))) + { +- hlsl_free_instr(&dst->node); ++ hlsl_block_cleanup(&then_block); ++ hlsl_block_cleanup(&else_block); + return NULL; + } +- return &dst->node; ++ ++ return dst; + } + + static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) + { +- struct hlsl_ir_jump *dst; +- +- if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) +- return NULL; +- return &dst->node; ++ return hlsl_new_jump(ctx, src->type, &src->node.loc); + } + + static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) +@@ -1452,16 +1610,18 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ + + static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) + { +- struct hlsl_ir_loop *dst; ++ struct hlsl_ir_node *dst; ++ struct hlsl_block body; + +- if (!(dst = hlsl_new_loop(ctx, src->node.loc))) ++ if (!clone_block(ctx, &body, &src->body, map)) + return NULL; +- if (!clone_block(ctx, &dst->body, &src->body, map)) ++ ++ if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) + { +- hlsl_free_instr(&dst->node); ++ hlsl_block_cleanup(&body); + return NULL; + } +- return &dst->node; ++ return dst; + } + + static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, +@@ -1486,7 +1646,11 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, + } + clone_src(map, &dst->coords, &src->coords); + clone_src(map, &dst->lod, &src->lod); ++ clone_src(map, &dst->ddx, &src->ddx); ++ clone_src(map, &dst->ddy, &src->ddy); ++ clone_src(map, &dst->sample_index, &src->sample_index); + clone_src(map, &dst->texel_offset, &src->texel_offset); ++ dst->sampling_dim = src->sampling_dim; + return &dst->node; + } + +@@ -1529,12 +1693,19 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr + static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_swizzle *src) + { +- struct hlsl_ir_swizzle *dst; ++ return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, ++ map_instr(map, src->val.node), &src->node.loc); ++} + +- if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, +- map_instr(map, src->val.node), &src->node.loc))) ++static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, ++ struct hlsl_ir_index *src) ++{ ++ struct hlsl_ir_node *dst; ++ ++ if (!(dst = hlsl_new_index(ctx, map_instr(map, src->val.node), map_instr(map, src->idx.node), ++ &src->node.loc))) + return NULL; +- return &dst->node; ++ return dst; + } + + static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, +@@ -1554,6 +1725,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + case HLSL_IR_IF: + return clone_if(ctx, map, hlsl_ir_if(instr)); + ++ case HLSL_IR_INDEX: ++ return clone_index(ctx, map, hlsl_ir_index(instr)); ++ + case HLSL_IR_JUMP: + return clone_jump(ctx, hlsl_ir_jump(instr)); + +@@ -1593,13 +1767,12 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, + const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) + { ++ struct hlsl_ir_node *constant, *store; + struct hlsl_ir_function_decl *decl; +- struct hlsl_ir_constant *constant; +- struct hlsl_ir_store *store; + + if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) + return NULL; +- list_init(&decl->body.instrs); ++ hlsl_block_init(&decl->body); + decl->return_type = return_type; + decl->parameters = *parameters; + decl->loc = *loc; +@@ -1620,17 +1793,17 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + + if (!(constant = hlsl_new_bool_constant(ctx, false, loc))) + return decl; +- list_add_tail(&decl->body.instrs, &constant->node.entry); ++ hlsl_block_add_instr(&decl->body, constant); + +- if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, &constant->node))) ++ if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, constant))) + return decl; +- list_add_tail(&decl->body.instrs, &store->node.entry); ++ hlsl_block_add_instr(&decl->body, store); + + return decl; + } + + struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, +- const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc) ++ const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) + { + struct hlsl_buffer *buffer; + +@@ -1640,7 +1813,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type + buffer->name = name; + if (reservation) + buffer->reservation = *reservation; +- buffer->loc = loc; ++ buffer->loc = *loc; + list_add_tail(&ctx->buffers, &buffer->entry); + return buffer; + } +@@ -1698,10 +1871,10 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls + { + int r; + +- if ((r = vkd3d_u32_compare(t1->type, t2->type))) ++ if ((r = vkd3d_u32_compare(t1->class, t2->class))) + { +- if (!((t1->type == HLSL_CLASS_SCALAR && t2->type == HLSL_CLASS_VECTOR) +- || (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_SCALAR))) ++ if (!((t1->class == HLSL_CLASS_SCALAR && t2->class == HLSL_CLASS_VECTOR) ++ || (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_SCALAR))) + return r; + } + if ((r = vkd3d_u32_compare(t1->base_type, t2->base_type))) +@@ -1718,7 +1891,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls + return r; + if ((r = vkd3d_u32_compare(t1->dimy, t2->dimy))) + return r; +- if (t1->type == HLSL_CLASS_STRUCT) ++ if (t1->class == HLSL_CLASS_STRUCT) + { + size_t i; + +@@ -1738,7 +1911,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls + } + return 0; + } +- if (t1->type == HLSL_CLASS_ARRAY) ++ if (t1->class == HLSL_CLASS_ARRAY) + { + if ((r = vkd3d_u32_compare(t1->e.array.elements_count, t2->e.array.elements_count))) + return r; +@@ -1768,7 +1941,7 @@ static int compare_function_decl_rb(const void *key, const struct rb_entry *entr + + struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) + { +- struct vkd3d_string_buffer *string; ++ struct vkd3d_string_buffer *string, *inner_string; + + static const char *const base_types[] = + { +@@ -1789,7 +1962,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + return string; + } + +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_SCALAR: + assert(type->base_type < ARRAY_SIZE(base_types)); +@@ -1808,10 +1981,9 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + + case HLSL_CLASS_ARRAY: + { +- struct vkd3d_string_buffer *inner_string; + const struct hlsl_type *t; + +- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) ++ for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) + ; + + if ((inner_string = hlsl_type_to_string(ctx, t))) +@@ -1820,7 +1992,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + hlsl_release_string_buffer(ctx, inner_string); + } + +- for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) ++ for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) + { + if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) + vkd3d_string_buffer_printf(string, "[]"); +@@ -1860,13 +2032,26 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + + assert(type->sampler_dim < ARRAY_SIZE(dimensions)); + assert(type->e.resource_format->base_type < ARRAY_SIZE(base_types)); +- vkd3d_string_buffer_printf(string, "Texture%s<%s%u>", dimensions[type->sampler_dim], +- base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); ++ vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } + return string; + + case HLSL_TYPE_UAV: +- vkd3d_string_buffer_printf(string, "RWTexture%s<%s%u>", dimensions[type->sampler_dim], +- base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); ++ if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) ++ vkd3d_string_buffer_printf(string, "RWBuffer"); ++ else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); ++ else ++ vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } + return string; + + default: +@@ -1943,6 +2128,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + "HLSL_IR_CONSTANT", + "HLSL_IR_EXPR", + "HLSL_IR_IF", ++ "HLSL_IR_INDEX", + "HLSL_IR_LOAD", + "HLSL_IR_LOOP", + "HLSL_IR_JUMP", +@@ -2107,7 +2293,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl + vkd3d_string_buffer_printf(buffer, "{"); + for (x = 0; x < type->dimx; ++x) + { +- const union hlsl_constant_value *value = &constant->value[x]; ++ const union hlsl_constant_value_component *value = &constant->value.u[x]; + + switch (type->base_type) + { +@@ -2168,6 +2354,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_SIN] = "sin", + [HLSL_OP1_SIN_REDUCED] = "sin_reduced", + [HLSL_OP1_SQRT] = "sqrt", ++ [HLSL_OP1_TRUNC] = "trunc", + + [HLSL_OP2_ADD] = "+", + [HLSL_OP2_BIT_AND] = "&", +@@ -2214,9 +2401,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + vkd3d_string_buffer_printf(buffer, "if ("); + dump_src(buffer, &if_node->condition); + vkd3d_string_buffer_printf(buffer, ") {\n"); +- dump_instr_list(ctx, buffer, &if_node->then_instrs.instrs); ++ dump_instr_list(ctx, buffer, &if_node->then_block.instrs); + vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); +- dump_instr_list(ctx, buffer, &if_node->else_instrs.instrs); ++ dump_instr_list(ctx, buffer, &if_node->else_block.instrs); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); + } + +@@ -2256,6 +2443,8 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + [HLSL_RESOURCE_LOAD] = "load_resource", + [HLSL_RESOURCE_SAMPLE] = "sample", + [HLSL_RESOURCE_SAMPLE_LOD] = "sample_lod", ++ [HLSL_RESOURCE_SAMPLE_LOD_BIAS] = "sample_biased", ++ [HLSL_RESOURCE_SAMPLE_GRAD] = "sample_grad", + [HLSL_RESOURCE_GATHER_RED] = "gather_red", + [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", + [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", +@@ -2269,6 +2458,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + dump_deref(buffer, &load->sampler); + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &load->coords); ++ if (load->sample_index.node) ++ { ++ vkd3d_string_buffer_printf(buffer, ", sample index = "); ++ dump_src(buffer, &load->sample_index); ++ } + if (load->texel_offset.node) + { + vkd3d_string_buffer_printf(buffer, ", offset = "); +@@ -2279,6 +2473,16 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + vkd3d_string_buffer_printf(buffer, ", lod = "); + dump_src(buffer, &load->lod); + } ++ if (load->ddx.node) ++ { ++ vkd3d_string_buffer_printf(buffer, ", ddx = "); ++ dump_src(buffer, &load->ddx); ++ } ++ if (load->ddy.node) ++ { ++ vkd3d_string_buffer_printf(buffer, ", ddy = "); ++ dump_src(buffer, &load->ddy); ++ } + vkd3d_string_buffer_printf(buffer, ")"); + } + +@@ -2321,6 +2525,14 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls + } + } + ++static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_index *index) ++{ ++ dump_src(buffer, &index->val); ++ vkd3d_string_buffer_printf(buffer, "[idx:"); ++ dump_src(buffer, &index->idx); ++ vkd3d_string_buffer_printf(buffer, "]"); ++} ++ + static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr) + { + if (instr->index) +@@ -2348,6 +2560,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + dump_ir_if(ctx, buffer, hlsl_ir_if(instr)); + break; + ++ case HLSL_IR_INDEX: ++ dump_ir_index(buffer, hlsl_ir_index(instr)); ++ break; ++ + case HLSL_IR_JUMP: + dump_ir_jump(buffer, hlsl_ir_jump(instr)); + break; +@@ -2421,7 +2637,7 @@ void hlsl_free_type(struct hlsl_type *type) + size_t i; + + vkd3d_free((void *)type->name); +- if (type->type == HLSL_CLASS_STRUCT) ++ if (type->class == HLSL_CLASS_STRUCT) + { + for (i = 0; i < type->e.record.field_count; ++i) + { +@@ -2447,6 +2663,11 @@ void hlsl_free_instr_list(struct list *list) + hlsl_free_instr(node); + } + ++void hlsl_block_cleanup(struct hlsl_block *block) ++{ ++ hlsl_free_instr_list(&block->instrs); ++} ++ + static void free_ir_call(struct hlsl_ir_call *call) + { + vkd3d_free(call); +@@ -2468,8 +2689,8 @@ static void free_ir_expr(struct hlsl_ir_expr *expr) + + static void free_ir_if(struct hlsl_ir_if *if_node) + { +- hlsl_free_instr_list(&if_node->then_instrs.instrs); +- hlsl_free_instr_list(&if_node->else_instrs.instrs); ++ hlsl_block_cleanup(&if_node->then_block); ++ hlsl_block_cleanup(&if_node->else_block); + hlsl_src_remove(&if_node->condition); + vkd3d_free(if_node); + } +@@ -2487,7 +2708,7 @@ static void free_ir_load(struct hlsl_ir_load *load) + + static void free_ir_loop(struct hlsl_ir_loop *loop) + { +- hlsl_free_instr_list(&loop->body.instrs); ++ hlsl_block_cleanup(&loop->body); + vkd3d_free(loop); + } + +@@ -2497,7 +2718,10 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) + hlsl_cleanup_deref(&load->resource); + hlsl_src_remove(&load->coords); + hlsl_src_remove(&load->lod); ++ hlsl_src_remove(&load->ddx); ++ hlsl_src_remove(&load->ddy); + hlsl_src_remove(&load->texel_offset); ++ hlsl_src_remove(&load->sample_index); + vkd3d_free(load); + } + +@@ -2522,6 +2746,13 @@ static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) + vkd3d_free(swizzle); + } + ++static void free_ir_index(struct hlsl_ir_index *index) ++{ ++ hlsl_src_remove(&index->val); ++ hlsl_src_remove(&index->idx); ++ vkd3d_free(index); ++} ++ + void hlsl_free_instr(struct hlsl_ir_node *node) + { + assert(list_empty(&node->uses)); +@@ -2544,6 +2775,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + free_ir_if(hlsl_ir_if(node)); + break; + ++ case HLSL_IR_INDEX: ++ free_ir_index(hlsl_ir_index(node)); ++ break; ++ + case HLSL_IR_JUMP: + free_ir_jump(hlsl_ir_jump(node)); + break; +@@ -2600,7 +2835,7 @@ static void free_function_decl(struct hlsl_ir_function_decl *decl) + vkd3d_free((void *)decl->attrs); + + vkd3d_free(decl->parameters.vars); +- hlsl_free_instr_list(&decl->body.instrs); ++ hlsl_block_cleanup(&decl->body); + vkd3d_free(decl); + } + +@@ -2844,8 +3079,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + { + {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, + {"float", HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1}, +- {"VECTOR", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, +- {"MATRIX", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, ++ {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, ++ {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, + {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, + {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, + {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, +@@ -2999,10 +3234,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, + list_init(&ctx->buffers); + + if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, +- hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) ++ hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) + return false; + if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, +- hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) ++ hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) + return false; + ctx->cur_buffer = ctx->globals_buffer; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index ccbf22a5801..376191b9ff3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -21,10 +21,12 @@ + #define __VKD3D_SHADER_HLSL_H + + #include "vkd3d_shader_private.h" +-#include "wine/rbtree.h" ++#include "rbtree.h" + #include "d3dcommon.h" + #include "d3dx9shader.h" +-#include "sm4.h" ++ ++enum vkd3d_sm4_register_type; ++enum vkd3d_sm4_swizzle_type; + + /* The general IR structure is inspired by Mesa GLSL hir, even though the code + * ends up being quite different in practice. Anyway, here comes the relevant +@@ -102,18 +104,21 @@ enum hlsl_base_type + + enum hlsl_sampler_dim + { +- HLSL_SAMPLER_DIM_GENERIC, +- HLSL_SAMPLER_DIM_1D, +- HLSL_SAMPLER_DIM_2D, +- HLSL_SAMPLER_DIM_3D, +- HLSL_SAMPLER_DIM_CUBE, +- HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, +- HLSL_SAMPLER_DIM_1DARRAY, +- HLSL_SAMPLER_DIM_2DARRAY, +- HLSL_SAMPLER_DIM_2DMS, +- HLSL_SAMPLER_DIM_2DMSARRAY, +- HLSL_SAMPLER_DIM_CUBEARRAY, +- HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_CUBEARRAY, ++ HLSL_SAMPLER_DIM_GENERIC, ++ HLSL_SAMPLER_DIM_1D, ++ HLSL_SAMPLER_DIM_2D, ++ HLSL_SAMPLER_DIM_3D, ++ HLSL_SAMPLER_DIM_CUBE, ++ HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, ++ HLSL_SAMPLER_DIM_1DARRAY, ++ HLSL_SAMPLER_DIM_2DARRAY, ++ HLSL_SAMPLER_DIM_2DMS, ++ HLSL_SAMPLER_DIM_2DMSARRAY, ++ HLSL_SAMPLER_DIM_CUBEARRAY, ++ HLSL_SAMPLER_DIM_LAST_TEXTURE = HLSL_SAMPLER_DIM_CUBEARRAY, ++ HLSL_SAMPLER_DIM_BUFFER, ++ HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, ++ HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, + }; + + enum hlsl_regset +@@ -134,16 +139,17 @@ struct hlsl_type + /* Item entry in hlsl_scope->types. hlsl_type->name is used as key (if not NULL). */ + struct rb_entry scope_entry; + +- enum hlsl_type_class type; ++ enum hlsl_type_class class; + /* If type is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. + * If type is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. + * Otherwise, base_type is not used. */ + enum hlsl_base_type base_type; + + /* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. +- * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can have any value of the enum. +- * If base_type is HLSL_TYPE_UAV, them sampler_dim must be one of HLSL_SAMPLER_DIM_1D, +- * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, or HLSL_SAMPLER_DIM_2DARRAY. ++ * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_TEXTURE. ++ * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, ++ * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, ++ * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. + * Otherwise, sampler_dim is not used */ + enum hlsl_sampler_dim sampler_dim; + /* Name, in case the type is a named struct or a typedef. */ +@@ -207,6 +213,16 @@ struct hlsl_semantic + { + const char *name; + uint32_t index; ++ ++ /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ ++ bool reported_missing; ++ /* In case the variable or field that stores this semantic has already reported to use a ++ * duplicated output semantic, this value stores the last reported index + 1. Otherwise it is 0. */ ++ uint32_t reported_duplicated_output_next_index; ++ /* In case the variable or field that stores this semantic has already reported to use a ++ * duplicated input semantic with incompatible values, this value stores the last reported ++ * index + 1. Otherwise it is 0. */ ++ uint32_t reported_duplicated_input_incompatible_next_index; + }; + + /* A field within a struct type declaration, used in hlsl_type.e.fields. */ +@@ -228,16 +244,21 @@ struct hlsl_struct_field + size_t name_bytecode_offset; + }; + +-/* Information of the register allocated for an instruction node or variable. ++/* Information of the register(s) allocated for an instruction node or variable. + * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, + * just before writing the bytecode. +- * For numeric registers, a writemask can be provided to indicate the reservation of only some of the +- * 4 components. + * The type of register (register class) is implied from its use, so it is not stored in this + * struct. */ + struct hlsl_reg + { ++ /* Index of the first register allocated. */ + uint32_t id; ++ /* Number of registers to be allocated. ++ * Unlike the variable's type's regsize, it is not expressed in register components, but rather ++ * in whole registers, and may depend on which components are used within the shader. */ ++ uint32_t bind_count; ++ /* For numeric registers, a writemask can be provided to indicate the reservation of only some ++ * of the 4 components. */ + unsigned int writemask; + /* Whether the register has been allocated. */ + bool allocated; +@@ -254,6 +275,7 @@ enum hlsl_ir_node_type + HLSL_IR_CONSTANT, + HLSL_IR_EXPR, + HLSL_IR_IF, ++ HLSL_IR_INDEX, + HLSL_IR_LOAD, + HLSL_IR_LOOP, + HLSL_IR_JUMP, +@@ -342,12 +364,17 @@ struct hlsl_attribute + + #define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0 + +-/* Reservation of a specific register to a variable, field, or buffer, written in the HLSL source +- * using the register(·) syntax */ ++/* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a ++ * starting point of their allocation. They are available through the register(·) and the ++ * packoffset(·) syntaxes, respectivelly. ++ * The costant buffer offset is measured register components. */ + struct hlsl_reg_reservation + { +- char type; +- unsigned int index; ++ char reg_type; ++ unsigned int reg_index; ++ ++ char offset_type; ++ unsigned int offset_index; + }; + + struct hlsl_ir_var +@@ -360,8 +387,7 @@ struct hlsl_ir_var + struct hlsl_buffer *buffer; + /* Bitfield for storage modifiers (type modifiers are stored in data_type->modifiers). */ + unsigned int storage_modifiers; +- /* Optional register to be used as a starting point for the variable allocation, specified +- * by the user via the register(·) syntax. */ ++ /* Optional reservations of registers and/or offsets for variables within constant buffers. */ + struct hlsl_reg_reservation reg_reservation; + + /* Item entry in hlsl_scope.vars. Specifically hlsl_ctx.globals.vars if the variable is global. */ +@@ -384,6 +410,13 @@ struct hlsl_ir_var + * and the buffer_offset instead. */ + struct hlsl_reg regs[HLSL_REGSET_LAST + 1]; + ++ struct ++ { ++ bool used; ++ enum hlsl_sampler_dim sampler_dim; ++ struct vkd3d_shader_location first_sampler_dim_loc; ++ } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; ++ + uint32_t is_input_semantic : 1; + uint32_t is_output_semantic : 1; + uint32_t is_uniform : 1; +@@ -446,8 +479,8 @@ struct hlsl_ir_if + { + struct hlsl_ir_node node; + struct hlsl_src condition; +- struct hlsl_block then_instrs; +- struct hlsl_block else_instrs; ++ struct hlsl_block then_block; ++ struct hlsl_block else_block; + }; + + struct hlsl_ir_loop +@@ -485,6 +518,7 @@ enum hlsl_ir_expr_op + HLSL_OP1_SIN, + HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ + HLSL_OP1_SQRT, ++ HLSL_OP1_TRUNC, + + HLSL_OP2_ADD, + HLSL_OP2_BIT_AND, +@@ -540,6 +574,12 @@ struct hlsl_ir_swizzle + DWORD swizzle; + }; + ++struct hlsl_ir_index ++{ ++ struct hlsl_ir_node node; ++ struct hlsl_src val, idx; ++}; ++ + /* Reference to a variable, or a part of it (e.g. a vector within a matrix within a struct). */ + struct hlsl_deref + { +@@ -575,6 +615,8 @@ enum hlsl_resource_load_type + HLSL_RESOURCE_LOAD, + HLSL_RESOURCE_SAMPLE, + HLSL_RESOURCE_SAMPLE_LOD, ++ HLSL_RESOURCE_SAMPLE_LOD_BIAS, ++ HLSL_RESOURCE_SAMPLE_GRAD, + HLSL_RESOURCE_GATHER_RED, + HLSL_RESOURCE_GATHER_GREEN, + HLSL_RESOURCE_GATHER_BLUE, +@@ -586,7 +628,8 @@ struct hlsl_ir_resource_load + struct hlsl_ir_node node; + enum hlsl_resource_load_type load_type; + struct hlsl_deref resource, sampler; +- struct hlsl_src coords, lod, texel_offset; ++ struct hlsl_src coords, lod, ddx, ddy, sample_index, texel_offset; ++ enum hlsl_sampler_dim sampling_dim; + }; + + struct hlsl_ir_resource_store +@@ -607,13 +650,16 @@ struct hlsl_ir_store + struct hlsl_ir_constant + { + struct hlsl_ir_node node; +- union hlsl_constant_value ++ struct hlsl_constant_value + { +- uint32_t u; +- int32_t i; +- float f; +- double d; +- } value[4]; ++ union hlsl_constant_value_component ++ { ++ uint32_t u; ++ int32_t i; ++ float f; ++ double d; ++ } u[4]; ++ } value; + /* Constant register of type 'c' where the constant value is stored for SM1. */ + struct hlsl_reg reg; + }; +@@ -674,6 +720,9 @@ struct hlsl_buffer + unsigned size, used_size; + /* Register of type 'b' on which the buffer is allocated. */ + struct hlsl_reg reg; ++ ++ bool manually_packed_elements; ++ bool automatically_packed_elements; + }; + + struct hlsl_ctx +@@ -780,8 +829,9 @@ struct hlsl_resource_load_params + { + struct hlsl_type *format; + enum hlsl_resource_load_type type; +- struct hlsl_deref resource, sampler; +- struct hlsl_ir_node *coords, *lod, *texel_offset; ++ struct hlsl_ir_node *resource, *sampler; ++ struct hlsl_ir_node *coords, *lod, *ddx, *ddy, *sample_index, *texel_offset; ++ enum hlsl_sampler_dim sampling_dim; + }; + + static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) +@@ -850,6 +900,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node + return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); + } + ++static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) ++{ ++ assert(node->type == HLSL_IR_INDEX); ++ return CONTAINING_RECORD(node, struct hlsl_ir_index, node); ++} ++ ++static inline void hlsl_block_init(struct hlsl_block *block) ++{ ++ list_init(&block->instrs); ++} ++ ++static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr) ++{ ++ list_add_tail(&block->instrs, &instr->entry); ++} ++ ++static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add) ++{ ++ list_move_tail(&block->instrs, &add->instrs); ++} ++ + static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) + { + src->node = node; +@@ -873,6 +944,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) + return ptr; + } + ++static inline void *hlsl_calloc(struct hlsl_ctx *ctx, size_t count, size_t size) ++{ ++ void *ptr = vkd3d_calloc(count, size); ++ ++ if (!ptr) ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return ptr; ++} ++ + static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) + { + void *ret = vkd3d_realloc(ptr, size); +@@ -948,6 +1028,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) + switch (dim) + { + case HLSL_SAMPLER_DIM_1D: ++ case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return 1; + case HLSL_SAMPLER_DIM_1DARRAY: + case HLSL_SAMPLER_DIM_2D: +@@ -974,11 +1056,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); + const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); + +-struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, ++struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); + void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); + bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); + ++void hlsl_block_cleanup(struct hlsl_block *block); + bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); + + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); +@@ -986,6 +1069,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); + ++bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); + bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); + + void hlsl_cleanup_deref(struct hlsl_deref *deref); +@@ -1012,64 +1096,71 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); + struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); + struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, + struct hlsl_ir_node *arg2); +-struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); + struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, +- const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc); ++ const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, + const struct vkd3d_shader_location *loc); +-struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, ++struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, + const struct vkd3d_shader_location *loc); + struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct vkd3d_shader_location *loc); +-struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); ++struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); + struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], + struct hlsl_type *data_type, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, ++struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, + float f, const struct vkd3d_shader_location *loc); + struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, + const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc); +-struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, +- const struct vkd3d_shader_location *loc); +-struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc); ++struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, ++ struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, ++ enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); + + void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); + + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, +- struct vkd3d_shader_location loc); ++ const struct vkd3d_shader_location *loc); + struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); + +-struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); +-struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, ++struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); ++struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, + struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs); + +-struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc); +-struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, ++bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); ++bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); ++ ++struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, ++ struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, + const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, ++struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, + struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); + struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + struct hlsl_struct_field *fields, size_t field_count); +-struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, ++struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); + struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, + unsigned int sample_count); + struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); +-struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, ++struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, + const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, +- struct vkd3d_shader_location loc); ++ const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, +- const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, ++ const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct hlsl_reg_reservation *reg_reservation); + + void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, +@@ -1101,6 +1192,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); + unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); + bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); + ++const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); ++unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); ++ + unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); + unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); + unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); +@@ -1109,12 +1203,16 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); + struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); + bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + unsigned int *start, unsigned int *count); ++bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, ++ enum hlsl_regset regset, unsigned int *index); + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); + unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); + struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); + + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); ++bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), ++ struct hlsl_block *block, void *context); + + bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); +@@ -1124,7 +1222,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun + bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, + const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); + bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); ++ bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); + + int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index adff1da04d8..e9ae3ccf3d3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -37,6 +37,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); + %option bison-locations + %option extra-type="struct hlsl_ctx *" + %option never-interactive ++%option nodefault + %option noinput + %option nounput + %option noyywrap +@@ -95,6 +96,7 @@ matrix {return KW_MATRIX; } + namespace {return KW_NAMESPACE; } + nointerpolation {return KW_NOINTERPOLATION; } + out {return KW_OUT; } ++packoffset {return KW_PACKOFFSET; } + pass {return KW_PASS; } + PixelShader {return KW_PIXELSHADER; } + precise {return KW_PRECISE; } +@@ -102,6 +104,8 @@ RasterizerState {return KW_RASTERIZERSTATE; } + RenderTargetView {return KW_RENDERTARGETVIEW; } + return {return KW_RETURN; } + register {return KW_REGISTER; } ++RWBuffer {return KW_RWBUFFER; } ++RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } + RWTexture1D {return KW_RWTEXTURE1D; } + RWTexture2D {return KW_RWTEXTURE2D; } + RWTexture3D {return KW_RWTEXTURE3D; } +@@ -265,6 +269,10 @@ row_major {return KW_ROW_MAJOR; } + return STRING; + } + {WS}+ {} ++{ANY} { ++ FIXME("Malformed preprocessor line directive?\n"); ++ BEGIN(INITIAL); ++ } + {NEWLINE} { + FIXME("Malformed preprocessor line directive?\n"); + BEGIN(INITIAL); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index fd1eaf6ec95..dae1851c7ad 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -85,8 +85,8 @@ struct parse_function + + struct parse_if_body + { +- struct list *then_instrs; +- struct list *else_instrs; ++ struct list *then_block; ++ struct list *else_block; + }; + + enum parse_assign_op +@@ -164,7 +164,7 @@ static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct + src_comp_type = hlsl_type_get_component_type(ctx, src, k); + dst_comp_type = hlsl_type_get_component_type(ctx, dst, k); + +- if ((src_comp_type->type != HLSL_CLASS_SCALAR || dst_comp_type->type != HLSL_CLASS_SCALAR) ++ if ((src_comp_type->class != HLSL_CLASS_SCALAR || dst_comp_type->class != HLSL_CLASS_SCALAR) + && !hlsl_types_are_equal(src_comp_type, dst_comp_type)) + return false; + } +@@ -196,9 +196,9 @@ static bool type_contains_only_numerics(struct hlsl_type *type) + { + unsigned int i; + +- if (type->type == HLSL_CLASS_ARRAY) ++ if (type->class == HLSL_CLASS_ARRAY) + return type_contains_only_numerics(type->e.array.type); +- if (type->type == HLSL_CLASS_STRUCT) ++ if (type->class == HLSL_CLASS_STRUCT) + { + for (i = 0; i < type->e.record.field_count; ++i) + { +@@ -207,23 +207,23 @@ static bool type_contains_only_numerics(struct hlsl_type *type) + } + return true; + } +- return type->type <= HLSL_CLASS_LAST_NUMERIC; ++ return type->class <= HLSL_CLASS_LAST_NUMERIC; + } + + static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) + { +- if (src->type <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) ++ if (src->class <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) + return true; + +- if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX ++ if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX + && src->dimx >= dst->dimx && src->dimy >= dst->dimy) + return true; + +- if ((src->type == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) ++ if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) + && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) + return false; + +- if ((dst->type == HLSL_CLASS_MATRIX && dst->dimy > 1) ++ if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) + && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) + return false; + +@@ -232,10 +232,10 @@ static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + + static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) + { +- if ((src->type <= HLSL_CLASS_LAST_NUMERIC) != (dst->type <= HLSL_CLASS_LAST_NUMERIC)) ++ if ((src->class <= HLSL_CLASS_LAST_NUMERIC) != (dst->class <= HLSL_CLASS_LAST_NUMERIC)) + return false; + +- if (src->type <= HLSL_CLASS_LAST_NUMERIC) ++ if (src->class <= HLSL_CLASS_LAST_NUMERIC) + { + /* Scalar vars can be converted to any other numeric data type */ + if (src->dimx == 1 && src->dimy == 1) +@@ -244,21 +244,21 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + if (dst->dimx == 1 && dst->dimy == 1) + return true; + +- if (src->type == HLSL_CLASS_MATRIX || dst->type == HLSL_CLASS_MATRIX) ++ if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) + { +- if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX) ++ if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) + return src->dimx >= dst->dimx && src->dimy >= dst->dimy; + + /* Matrix-vector conversion is apparently allowed if they have + * the same components count, or if the matrix is 1xN or Nx1 + * and we are reducing the component count */ +- if (src->type == HLSL_CLASS_VECTOR || dst->type == HLSL_CLASS_VECTOR) ++ if (src->class == HLSL_CLASS_VECTOR || dst->class == HLSL_CLASS_VECTOR) + { + if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) + return true; + +- if ((src->type == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && +- (dst->type == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) ++ if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && ++ (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) + return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); + } + +@@ -273,19 +273,19 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + return hlsl_types_are_componentwise_equal(ctx, src, dst); + } + +-static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, ++static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, + unsigned int comp, const struct vkd3d_shader_location *loc); + + static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) + { + struct hlsl_type *src_type = node->data_type; +- struct hlsl_ir_expr *cast; ++ struct hlsl_ir_node *cast; + + if (hlsl_types_are_equal(src_type, dst_type)) + return node; + +- if (src_type->type > HLSL_CLASS_VECTOR || dst_type->type > HLSL_CLASS_VECTOR) ++ if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) + { + unsigned int src_comp_count = hlsl_type_component_count(src_type); + unsigned int dst_comp_count = hlsl_type_component_count(dst_type); +@@ -295,9 +295,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_var *var; + unsigned int dst_idx; + +- broadcast = src_type->type <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; ++ broadcast = src_type->class <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; + matrix_cast = !broadcast && dst_comp_count != src_comp_count +- && src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX; ++ && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; + assert(src_comp_count >= dst_comp_count || broadcast); + if (matrix_cast) + { +@@ -311,8 +311,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + + for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) + { ++ struct hlsl_ir_node *component_load; + struct hlsl_type *dst_comp_type; +- struct hlsl_ir_store *store; + struct hlsl_block block; + unsigned int src_idx; + +@@ -333,19 +333,19 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + + dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); + +- if (!(load = add_load_component(ctx, instrs, node, src_idx, loc))) ++ if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) + return NULL; + +- if (!(cast = hlsl_new_cast(ctx, &load->node, dst_comp_type, loc))) ++ if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) + return NULL; +- list_add_tail(instrs, &cast->node.entry); ++ list_add_tail(instrs, &cast->entry); + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, &cast->node))) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) + return NULL; + list_move_tail(instrs, &block.instrs); + } + +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) ++ if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; + list_add_tail(instrs, &load->node.entry); + +@@ -355,8 +355,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, + { + if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) + return NULL; +- list_add_tail(instrs, &cast->node.entry); +- return &cast->node; ++ list_add_tail(instrs, &cast->entry); ++ return cast; + } + } + +@@ -384,19 +384,20 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct + + if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", +- src_type->type == HLSL_CLASS_VECTOR ? "vector" : "matrix"); ++ src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + + return add_cast(ctx, instrs, node, dst_type, loc); + } + +-static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, const struct vkd3d_shader_location loc) ++static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, ++ const struct vkd3d_shader_location *loc) + { + if (modifiers & mod) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, mod))) +- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifier '%s' was already specified.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return modifiers; +@@ -406,26 +407,27 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, con + + static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) + { +- struct hlsl_ir_node *condition, *not; +- struct hlsl_ir_jump *jump; +- struct hlsl_ir_if *iff; ++ struct hlsl_ir_node *condition, *not, *iff, *jump; ++ struct hlsl_block then_block; + + /* E.g. "for (i = 0; ; ++i)". */ + if (list_empty(cond_list)) + return true; + + condition = node_from_list(cond_list); +- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, condition->loc))) ++ if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) + return false; + list_add_tail(cond_list, ¬->entry); + +- if (!(iff = hlsl_new_if(ctx, not, condition->loc))) ++ hlsl_block_init(&then_block); ++ ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) + return false; +- list_add_tail(cond_list, &iff->node.entry); ++ hlsl_block_add_instr(&then_block, jump); + +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, condition->loc))) ++ if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) + return false; +- list_add_head(&iff->then_instrs.instrs, &jump->node.entry); ++ list_add_tail(cond_list, &iff->entry); + return true; + } + +@@ -436,46 +438,87 @@ enum loop_type + LOOP_DO_WHILE + }; + +-static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, struct list *init, struct list *cond, +- struct list *iter, struct list *body, struct vkd3d_shader_location loc) ++static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) + { +- struct list *list = NULL; +- struct hlsl_ir_loop *loop = NULL; +- struct hlsl_ir_if *cond_jump = NULL; ++ unsigned int i, j; + +- if (!(list = make_empty_list(ctx))) +- goto oom; ++ for (i = 0; i < attrs->count; ++i) ++ { ++ for (j = i + 1; j < attrs->count; ++j) ++ { ++ if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, ++ struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_block body_block; ++ struct hlsl_ir_node *loop; ++ unsigned int i; ++ ++ if (attribute_list_has_duplicates(attributes)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); + +- if (init) +- list_move_head(list, init); ++ /* Ignore unroll(0) attribute, and any invalid attribute. */ ++ for (i = 0; i < attributes->count; ++i) ++ { ++ const struct hlsl_attribute *attr = attributes->attrs[i]; ++ if (!strcmp(attr->name, "unroll")) ++ { ++ if (attr->args_count) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); ++ } ++ else ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); ++ } ++ } ++ else if (!strcmp(attr->name, "loop") ++ || !strcmp(attr->name, "fastopt") ++ || !strcmp(attr->name, "allow_uav_condition")) ++ { ++ hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); ++ } ++ else ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); ++ } ++ } + +- if (!(loop = hlsl_new_loop(ctx, loc))) ++ if (!init && !(init = make_empty_list(ctx))) + goto oom; +- list_add_tail(list, &loop->node.entry); + + if (!append_conditional_break(ctx, cond)) + goto oom; + ++ hlsl_block_init(&body_block); ++ + if (type != LOOP_DO_WHILE) +- list_move_tail(&loop->body.instrs, cond); ++ list_move_tail(&body_block.instrs, cond); + +- list_move_tail(&loop->body.instrs, body); ++ list_move_tail(&body_block.instrs, body); + + if (iter) +- list_move_tail(&loop->body.instrs, iter); ++ list_move_tail(&body_block.instrs, iter); + + if (type == LOOP_DO_WHILE) +- list_move_tail(&loop->body.instrs, cond); ++ list_move_tail(&body_block.instrs, cond); ++ ++ if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) ++ goto oom; ++ list_add_tail(init, &loop->entry); + +- vkd3d_free(init); + vkd3d_free(cond); + vkd3d_free(body); +- return list; ++ return init; + + oom: +- vkd3d_free(loop); +- vkd3d_free(cond_jump); +- vkd3d_free(list); + destroy_instr_list(init); + destroy_instr_list(cond); + destroy_instr_list(iter); +@@ -500,14 +543,14 @@ static void free_parse_initializer(struct parse_initializer *initializer) + vkd3d_free(initializer->args); + } + +-static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, ++static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, + struct vkd3d_shader_location *loc) + { + unsigned int len = strlen(swizzle), component = 0; + unsigned int i, set, swiz = 0; + bool valid; + +- if (value->data_type->type == HLSL_CLASS_MATRIX) ++ if (value->data_type->class == HLSL_CLASS_MATRIX) + { + /* Matrix swizzle */ + bool m_swizzle; +@@ -582,224 +625,102 @@ static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_ + return NULL; + } + +-static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs, +- struct hlsl_ir_node *return_value, struct vkd3d_shader_location loc) ++static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, ++ struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) + { + struct hlsl_type *return_type = ctx->cur_function->return_type; +- struct hlsl_ir_jump *jump; ++ struct hlsl_ir_node *jump; + + if (ctx->cur_function->return_var) + { + if (return_value) + { +- struct hlsl_ir_store *store; ++ struct hlsl_ir_node *store; + +- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) +- return NULL; ++ if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) ++ return false; + + if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) +- return NULL; +- list_add_after(&return_value->entry, &store->node.entry); ++ return false; ++ list_add_after(&return_value->entry, &store->entry); + } + else + { +- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); +- return NULL; ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); ++ return false; + } + } + else + { + if (return_value) +- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); + } + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) +- return NULL; +- list_add_tail(instrs, &jump->node.entry); +- +- return jump; +-} +- +-static struct hlsl_ir_load *add_load_index(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +- struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) +-{ +- const struct hlsl_deref *src; +- struct hlsl_ir_load *load; +- +- if (var_instr->type == HLSL_IR_LOAD) +- { +- src = &hlsl_ir_load(var_instr)->src; +- } +- else +- { +- struct hlsl_ir_store *store; +- struct hlsl_ir_var *var; +- +- if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) +- return NULL; +- +- if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) +- return NULL; +- list_add_tail(instrs, &store->node.entry); +- +- src = &store->lhs; +- } +- +- if (!(load = hlsl_new_load_index(ctx, src, idx, loc))) +- return NULL; +- list_add_tail(instrs, &load->node.entry); ++ return false; ++ list_add_tail(instrs, &jump->entry); + +- return load; ++ return true; + } + +-static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, ++static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, + unsigned int comp, const struct vkd3d_shader_location *loc) + { +- const struct hlsl_deref *src; +- struct hlsl_ir_load *load; ++ struct hlsl_ir_node *load, *store; + struct hlsl_block block; ++ struct hlsl_ir_var *var; ++ struct hlsl_deref src; + +- if (var_instr->type == HLSL_IR_LOAD) +- { +- src = &hlsl_ir_load(var_instr)->src; +- } +- else +- { +- struct hlsl_ir_store *store; +- struct hlsl_ir_var *var; +- +- if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) +- return NULL; +- +- if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) +- return NULL; +- list_add_tail(instrs, &store->node.entry); ++ if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) ++ return NULL; + +- src = &store->lhs; +- } ++ if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) ++ return NULL; ++ list_add_tail(instrs, &store->entry); + +- if (!(load = hlsl_new_load_component(ctx, &block, src, comp, loc))) ++ hlsl_init_simple_deref_from_var(&src, var); ++ if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) + return NULL; + list_move_tail(instrs, &block.instrs); + + return load; + } + +-static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, +- unsigned int idx, const struct vkd3d_shader_location loc) ++static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, ++ unsigned int idx, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *index, *c; + + assert(idx < record->data_type->e.record.field_count); + +- if (!(c = hlsl_new_uint_constant(ctx, idx, &loc))) +- return false; +- list_add_tail(instrs, &c->node.entry); +- +- return !!add_load_index(ctx, instrs, record, &c->node, &loc); +-} +- +-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +- enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, +- const struct vkd3d_shader_location *loc); +- +-static bool add_matrix_index(struct hlsl_ctx *ctx, struct list *instrs, +- struct hlsl_ir_node *matrix, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_type *mat_type = matrix->data_type, *ret_type; +- struct hlsl_deref var_deref; +- struct hlsl_ir_load *load; +- struct hlsl_ir_var *var; +- unsigned int i; +- +- if (hlsl_type_is_row_major(mat_type)) +- return add_load_index(ctx, instrs, matrix, index, loc); +- +- ret_type = hlsl_get_vector_type(ctx, mat_type->base_type, mat_type->dimx); +- +- if (!(var = hlsl_new_synthetic_var(ctx, "index", ret_type, loc))) ++ if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) + return false; +- hlsl_init_simple_deref_from_var(&var_deref, var); +- +- for (i = 0; i < mat_type->dimx; ++i) +- { +- struct hlsl_ir_load *column, *value; +- struct hlsl_ir_store *store; +- struct hlsl_ir_constant *c; +- struct hlsl_block block; +- +- if (!(c = hlsl_new_uint_constant(ctx, i, loc))) +- return false; +- list_add_tail(instrs, &c->node.entry); +- +- if (!(column = add_load_index(ctx, instrs, matrix, &c->node, loc))) +- return false; +- +- if (!(value = add_load_index(ctx, instrs, &column->node, index, loc))) +- return false; ++ list_add_tail(instrs, &c->entry); + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i, &value->node))) +- return false; +- list_move_tail(instrs, &block.instrs); +- } +- +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) ++ if (!(index = hlsl_new_index(ctx, record, c, loc))) + return false; +- list_add_tail(instrs, &load->node.entry); ++ list_add_tail(instrs, &index->entry); + + return true; + } + +-static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct list *instrs, +- struct hlsl_ir_node *index, unsigned int dim_count, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_load *coords_load; +- struct hlsl_deref coords_deref; +- struct hlsl_ir_constant *zero; +- struct hlsl_ir_store *store; +- struct hlsl_ir_var *coords; +- +- if (!(coords = hlsl_new_synthetic_var(ctx, "coords", +- hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) +- return NULL; +- +- hlsl_init_simple_deref_from_var(&coords_deref, coords); +- if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) +- return NULL; +- list_add_tail(instrs, &store->node.entry); +- +- if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) +- return NULL; +- list_add_tail(instrs, &zero->node.entry); +- +- if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, &zero->node, 1u << dim_count, loc))) +- return NULL; +- list_add_tail(instrs, &store->node.entry); +- +- if (!(coords_load = hlsl_new_var_load(ctx, coords, *loc))) +- return NULL; +- list_add_tail(instrs, &coords_load->node.entry); +- +- return &coords_load->node; +-} ++static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, ++ enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, ++ const struct vkd3d_shader_location *loc); + +-static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, ++static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, + struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; +- struct hlsl_ir_expr *cast; ++ struct hlsl_ir_node *return_index, *cast; + +- if (expr_type->type == HLSL_CLASS_OBJECT ++ if (expr_type->class == HLSL_CLASS_OBJECT + && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) + && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { +- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; + unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); +- /* Only HLSL_IR_LOAD can return an object. */ +- struct hlsl_ir_load *object_load = hlsl_ir_load(array); +- struct hlsl_ir_resource_load *resource_load; + +- if (index_type->type > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) ++ if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) + { + struct vkd3d_string_buffer *string; + +@@ -814,20 +735,14 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls + hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) + return false; + +- if (!(index = add_zero_mipmap_level(ctx, instrs, index, dim_count, loc))) ++ if (!(return_index = hlsl_new_index(ctx, array, index, loc))) + return false; ++ list_add_tail(instrs, &return_index->entry); + +- load_params.format = expr_type->e.resource_format; +- load_params.resource = object_load->src; +- load_params.coords = index; +- +- if (!(resource_load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- list_add_tail(instrs, &resource_load->node.entry); + return true; + } + +- if (index_type->type != HLSL_CLASS_SCALAR) ++ if (index_type->class != HLSL_CLASS_SCALAR) + { + hlsl_error(ctx, &index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Array index is not scalar."); + return false; +@@ -835,23 +750,21 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls + + if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) + return false; +- list_add_tail(instrs, &cast->node.entry); +- index = &cast->node; +- +- if (expr_type->type == HLSL_CLASS_MATRIX) +- return add_matrix_index(ctx, instrs, array, index, loc); ++ list_add_tail(instrs, &cast->entry); ++ index = cast; + +- if (expr_type->type != HLSL_CLASS_ARRAY && expr_type->type != HLSL_CLASS_VECTOR) ++ if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) + { +- if (expr_type->type == HLSL_CLASS_SCALAR) ++ if (expr_type->class == HLSL_CLASS_SCALAR) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Scalar expressions cannot be array-indexed."); + else + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Expression cannot be array-indexed."); + return false; + } + +- if (!add_load_index(ctx, instrs, array, index, loc)) ++ if (!(return_index = hlsl_new_index(ctx, array, index, loc))) + return false; ++ list_add_tail(instrs, &return_index->entry); + + return true; + } +@@ -877,12 +790,12 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_ + + if (!(*modifiers & HLSL_MODIFIERS_MAJORITY_MASK) + && !(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK) +- && type->type == HLSL_CLASS_MATRIX) ++ && type->class == HLSL_CLASS_MATRIX) + { + if (!(default_majority = ctx->matrix_majority) && force_majority) + default_majority = HLSL_MODIFIER_COLUMN_MAJOR; + } +- else if (type->type != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) ++ else if (type->class != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "'row_major' and 'column_major' modifiers are only allowed for matrices."); +@@ -923,7 +836,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + struct parse_variable_def *v, *v_next; + size_t i = 0; + +- if (type->type == HLSL_CLASS_MATRIX) ++ if (type->class == HLSL_CLASS_MATRIX) + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + memset(fields, 0, sizeof(*fields)); +@@ -939,7 +852,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + + field->type = type; + +- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) ++ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + { + for (k = 0; k < v->arrays.count; ++k) + unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); +@@ -983,6 +896,9 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); + free_parse_initializer(&v->initializer); + } ++ if (v->reg_reservation.offset_type) ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() is not allowed inside struct definitions."); + vkd3d_free(v); + } + vkd3d_free(defs); +@@ -1052,18 +968,23 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, + } + + static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, +- struct parse_parameter *param, const struct vkd3d_shader_location loc) ++ struct parse_parameter *param, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_var *var; + +- if (param->type->type == HLSL_CLASS_MATRIX) ++ if (param->type->class == HLSL_CLASS_MATRIX) + assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) +- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name); + +- if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ¶m->reg_reservation))) ++ if (param->reg_reservation.offset_type) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() is not allowed on function parameters."); ++ ++ if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ++ ¶m->reg_reservation))) + return false; + var->is_param = 1; + +@@ -1084,12 +1005,61 @@ static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) + { + struct hlsl_reg_reservation reservation = {0}; + +- if (!sscanf(reg_string + 1, "%u", &reservation.index)) ++ if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) + { + FIXME("Unsupported register reservation syntax.\n"); + return reservation; + } +- reservation.type = reg_string[0]; ++ reservation.reg_type = ascii_tolower(reg_string[0]); ++ return reservation; ++} ++ ++static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, ++ const char *swizzle, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_reg_reservation reservation = {0}; ++ char *endptr; ++ ++ if (ctx->profile->major_version < 4) ++ return reservation; ++ ++ reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); ++ if (*endptr) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid packoffset() syntax."); ++ return reservation; ++ } ++ ++ reservation.offset_type = ascii_tolower(reg_string[0]); ++ if (reservation.offset_type != 'c') ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Only 'c' registers are allowed in packoffset()."); ++ return reservation; ++ } ++ ++ reservation.offset_index *= 4; ++ ++ if (swizzle) ++ { ++ if (strlen(swizzle) != 1) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid packoffset() component \"%s\".", swizzle); ++ ++ if (swizzle[0] == 'x' || swizzle[0] == 'r') ++ reservation.offset_index += 0; ++ else if (swizzle[0] == 'y' || swizzle[0] == 'g') ++ reservation.offset_index += 1; ++ else if (swizzle[0] == 'z' || swizzle[0] == 'b') ++ reservation.offset_index += 2; ++ else if (swizzle[0] == 'w' || swizzle[0] == 'a') ++ reservation.offset_index += 3; ++ else ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid packoffset() component \"%s\".", swizzle); ++ } ++ + return reservation; + } + +@@ -1122,53 +1092,32 @@ static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) + return list; + } + +-static unsigned int evaluate_static_expression(struct hlsl_ir_node *node) ++static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct vkd3d_shader_location *loc) + { +- if (node->data_type->type != HLSL_CLASS_SCALAR) +- return 0; +- +- switch (node->type) +- { +- case HLSL_IR_CONSTANT: +- { +- struct hlsl_ir_constant *constant = hlsl_ir_constant(node); +- const union hlsl_constant_value *value = &constant->value[0]; ++ struct hlsl_ir_constant *constant; ++ struct hlsl_ir_node *node; ++ unsigned int ret = 0; + +- switch (constant->node.data_type->base_type) +- { +- case HLSL_TYPE_UINT: +- return value->u; +- case HLSL_TYPE_INT: +- return value->i; +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return value->f; +- case HLSL_TYPE_DOUBLE: +- return value->d; +- case HLSL_TYPE_BOOL: +- return !!value->u; +- default: +- vkd3d_unreachable(); +- } +- } ++ if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) ++ return 0; + +- case HLSL_IR_EXPR: +- case HLSL_IR_LOAD: +- case HLSL_IR_RESOURCE_LOAD: +- case HLSL_IR_SWIZZLE: +- FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); +- return 0; ++ while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL)); + +- case HLSL_IR_CALL: +- case HLSL_IR_IF: +- case HLSL_IR_JUMP: +- case HLSL_IR_LOOP: +- case HLSL_IR_RESOURCE_STORE: +- case HLSL_IR_STORE: +- vkd3d_unreachable(); ++ node = node_from_list(&block->instrs); ++ if (node->type == HLSL_IR_CONSTANT) ++ { ++ constant = hlsl_ir_constant(node); ++ ret = constant->value.u[0].u; ++ } ++ else ++ { ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Failed to evaluate constant expression %d.", node->type); + } + +- vkd3d_unreachable(); ++ return ret; + } + + static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) +@@ -1180,20 +1129,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t + if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) + return true; + +- if (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_VECTOR) ++ if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) + return true; + +- if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) ++ if (t1->class == HLSL_CLASS_MATRIX || t2->class == HLSL_CLASS_MATRIX) + { + /* Matrix-vector conversion is apparently allowed if either they have the same components + count or the matrix is nx1 or 1xn */ +- if (t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) ++ if (t1->class == HLSL_CLASS_VECTOR || t2->class == HLSL_CLASS_VECTOR) + { + if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) + return true; + +- return (t1->type == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) +- || (t2->type == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); ++ return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) ++ || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); + } + + /* Both matrices */ +@@ -1226,7 +1175,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl + static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, + const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) + { +- if (t1->type > HLSL_CLASS_LAST_NUMERIC) ++ if (t1->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + +@@ -1237,7 +1186,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct + return false; + } + +- if (t2->type > HLSL_CLASS_LAST_NUMERIC) ++ if (t2->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + +@@ -1264,17 +1213,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct + + if (t1->dimx == 1 && t1->dimy == 1) + { +- *type = t2->type; ++ *type = t2->class; + *dimx = t2->dimx; + *dimy = t2->dimy; + } + else if (t2->dimx == 1 && t2->dimy == 1) + { +- *type = t1->type; ++ *type = t1->class; + *dimx = t1->dimx; + *dimy = t1->dimy; + } +- else if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX) ++ else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) + { + *type = HLSL_CLASS_MATRIX; + *dimx = min(t1->dimx, t2->dimx); +@@ -1284,13 +1233,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct + { + if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) + { +- *type = t1->type; ++ *type = t1->class; + *dimx = t1->dimx; + *dimy = t1->dimy; + } + else + { +- *type = t2->type; ++ *type = t2->class; + *dimx = t2->dimx; + *dimy = t2->dimy; + } +@@ -1306,55 +1255,50 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *expr; + unsigned int i; + +- if (type->type == HLSL_CLASS_MATRIX) ++ if (type->class == HLSL_CLASS_MATRIX) + { +- struct hlsl_type *vector_type; ++ struct hlsl_type *scalar_type; ++ struct hlsl_ir_load *var_load; + struct hlsl_deref var_deref; +- struct hlsl_ir_load *load; ++ struct hlsl_ir_node *load; + struct hlsl_ir_var *var; + +- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); ++ scalar_type = hlsl_get_scalar_type(ctx, type->base_type); + + if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) + return NULL; + hlsl_init_simple_deref_from_var(&var_deref, var); + +- for (i = 0; i < hlsl_type_major_size(type); ++i) ++ for (i = 0; i < type->dimy * type->dimx; ++i) + { +- struct hlsl_ir_node *value, *vector_operands[HLSL_MAX_OPERANDS] = { NULL }; +- struct hlsl_ir_store *store; +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; ++ struct hlsl_block block; + unsigned int j; + +- if (!(c = hlsl_new_uint_constant(ctx, i, loc))) +- return NULL; +- list_add_tail(instrs, &c->node.entry); +- + for (j = 0; j < HLSL_MAX_OPERANDS; j++) + { + if (operands[j]) + { +- struct hlsl_ir_load *load; +- +- if (!(load = add_load_index(ctx, instrs, operands[j], &c->node, loc))) ++ if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) + return NULL; +- vector_operands[j] = &load->node; ++ ++ cell_operands[j] = load; + } + } + +- if (!(value = add_expr(ctx, instrs, op, vector_operands, vector_type, loc))) ++ if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) + return NULL; + +- if (!(store = hlsl_new_store_index(ctx, &var_deref, &c->node, value, 0, loc))) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) + return NULL; +- list_add_tail(instrs, &store->node.entry); ++ list_move_tail(instrs, &block.instrs); + } + +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) ++ if (!(var_load = hlsl_new_var_load(ctx, var, loc))) + return NULL; +- list_add_tail(instrs, &load->node.entry); ++ list_add_tail(instrs, &var_load->node.entry); + +- return &load->node; ++ return &var_load->node; + } + + if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) +@@ -1407,7 +1351,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *bool_type; + +- bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, ++ bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, + arg->data_type->dimx, arg->data_type->dimy); + + if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) +@@ -1416,20 +1360,27 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct + return add_expr(ctx, instrs, op, args, bool_type, loc); + } + +-static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +- enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, +- const struct vkd3d_shader_location *loc) ++static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, ++ const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) + { +- struct hlsl_type *common_type; + enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); + enum hlsl_type_class type; + unsigned int dimx, dimy; +- struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + + if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) + return NULL; + +- common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); ++ return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); ++} ++ ++static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, ++ enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, ++ const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_type *common_type; ++ ++ common_type = get_common_numeric_type(ctx, arg1, arg2, loc); + + if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + return NULL; +@@ -1441,13 +1392,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str + } + + static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) ++ enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); +- add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, &loc); ++ add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); + return list1; + } + +@@ -1499,13 +1450,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str + } + + static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, +- enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) ++ enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); +- add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); ++ add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); + return list1; + } + +@@ -1596,7 +1547,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + enum hlsl_ir_expr_op op; + unsigned dim; + +- if (arg1->data_type->type == HLSL_CLASS_MATRIX) ++ if (arg1->data_type->class == HLSL_CLASS_MATRIX) + { + struct vkd3d_string_buffer *string; + +@@ -1607,7 +1558,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + return NULL; + } + +- if (arg2->data_type->type == HLSL_CLASS_MATRIX) ++ if (arg2->data_type->class == HLSL_CLASS_MATRIX) + { + struct vkd3d_string_buffer *string; + +@@ -1618,9 +1569,9 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis + return NULL; + } + +- if (arg1->data_type->type == HLSL_CLASS_SCALAR) ++ if (arg1->data_type->class == HLSL_CLASS_SCALAR) + dim = arg2->data_type->dimx; +- else if (arg2->data_type->type == HLSL_CLASS_SCALAR) ++ else if (arg2->data_type->class == HLSL_CLASS_SCALAR) + dim = arg1->data_type->dimx; + else + dim = min(arg1->data_type->dimx, arg2->data_type->dimx); +@@ -1702,7 +1653,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) + { + struct hlsl_type *lhs_type = lhs->data_type; +- struct hlsl_ir_expr *copy; ++ struct hlsl_ir_node *copy; + unsigned int writemask = 0; + + if (assign_op == ASSIGN_OP_SUB) +@@ -1720,13 +1671,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + return NULL; + } + +- if (lhs_type->type <= HLSL_CLASS_LAST_NUMERIC) ++ if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) + writemask = (1 << lhs_type->dimx) - 1; + + if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) + return NULL; + +- while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_RESOURCE_LOAD) ++ while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) + { + if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) + { +@@ -1735,10 +1686,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + } + else if (lhs->type == HLSL_IR_SWIZZLE) + { +- struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs), *new_swizzle; ++ struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); + unsigned int width, s = swizzle->swizzle; ++ struct hlsl_ir_node *new_swizzle; + +- if (lhs->data_type->type == HLSL_CLASS_MATRIX) ++ if (lhs->data_type->class == HLSL_CLASS_MATRIX) + hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); + + if (!invert_swizzle(&s, &writemask, &width)) +@@ -1751,10 +1703,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + { + return NULL; + } +- list_add_tail(instrs, &new_swizzle->node.entry); ++ list_add_tail(instrs, &new_swizzle->entry); + + lhs = swizzle->val.node; +- rhs = &new_swizzle->node; ++ rhs = new_swizzle; + } + else + { +@@ -1763,18 +1715,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + } + } + +- if (lhs->type == HLSL_IR_RESOURCE_LOAD) ++ if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_resource_access(hlsl_ir_index(lhs))) + { +- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(lhs); +- struct hlsl_ir_resource_store *store; ++ struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; ++ struct hlsl_deref resource_deref; + struct hlsl_type *resource_type; +- struct hlsl_ir_swizzle *coords; ++ struct hlsl_ir_node *store; + unsigned int dim_count; + +- /* Such an lvalue was produced by an index expression. */ +- assert(load->load_type == HLSL_RESOURCE_LOAD); +- resource_type = hlsl_deref_get_type(ctx, &load->resource); +- assert(resource_type->type == HLSL_CLASS_OBJECT); ++ if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node)) ++ return NULL; ++ ++ resource_type = hlsl_deref_get_type(ctx, &resource_deref); ++ assert(resource_type->class == HLSL_CLASS_OBJECT); + assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); + + if (resource_type->base_type != HLSL_TYPE_UAV) +@@ -1787,25 +1740,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, + "Resource store expressions must write to all components."); + +- /* Remove the (implicit) mipmap level from the load expression. */ +- assert(load->coords.node->data_type->type == HLSL_CLASS_VECTOR); +- assert(load->coords.node->data_type->base_type == HLSL_TYPE_UINT); +- assert(load->coords.node->data_type->dimx == dim_count + 1); +- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dim_count, load->coords.node, &lhs->loc))) +- return NULL; +- list_add_tail(instrs, &coords->node.entry); ++ assert(coords->data_type->class == HLSL_CLASS_VECTOR); ++ assert(coords->data_type->base_type == HLSL_TYPE_UINT); ++ assert(coords->data_type->dimx == dim_count); + +- if (!(store = hlsl_new_resource_store(ctx, &load->resource, &coords->node, rhs, &lhs->loc))) ++ if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) ++ { ++ hlsl_cleanup_deref(&resource_deref); + return NULL; +- list_add_tail(instrs, &store->node.entry); ++ } ++ list_add_tail(instrs, &store->entry); ++ hlsl_cleanup_deref(&resource_deref); ++ } ++ else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) ++ { ++ struct hlsl_ir_index *row = hlsl_ir_index(lhs); ++ struct hlsl_ir_node *mat = row->val.node; ++ unsigned int i, k = 0; ++ ++ for (i = 0; i < mat->data_type->dimx; ++i) ++ { ++ struct hlsl_ir_node *cell, *load, *store, *c; ++ struct hlsl_deref deref; ++ ++ if (!(writemask & (1 << i))) ++ continue; ++ ++ if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) ++ return NULL; ++ list_add_tail(instrs, &c->entry); ++ ++ if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) ++ return NULL; ++ list_add_tail(instrs, &cell->entry); ++ ++ if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) ++ return NULL; ++ ++ if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) ++ return NULL; ++ ++ if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc))) ++ { ++ hlsl_cleanup_deref(&deref); ++ return NULL; ++ } ++ list_add_tail(instrs, &store->entry); ++ hlsl_cleanup_deref(&deref); ++ } + } + else + { +- struct hlsl_ir_store *store; ++ struct hlsl_ir_node *store; ++ struct hlsl_deref deref; ++ ++ if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) ++ return NULL; + +- if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) ++ if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) ++ { ++ hlsl_cleanup_deref(&deref); + return NULL; +- list_add_tail(instrs, &store->node.entry); ++ } ++ list_add_tail(instrs, &store->entry); ++ hlsl_cleanup_deref(&deref); + } + + /* Don't use the instruction itself as a source, as this makes structure +@@ -1813,37 +1811,37 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in + * the last instruction in the list, we do need to copy. */ + if (!(copy = hlsl_new_copy(ctx, rhs))) + return NULL; +- list_add_tail(instrs, ©->node.entry); +- return ©->node; ++ list_add_tail(instrs, ©->entry); ++ return copy; + } + + static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, +- struct vkd3d_shader_location loc) ++ const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *lhs = node_from_list(instrs); +- struct hlsl_ir_constant *one; ++ struct hlsl_ir_node *one; + + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) +- hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); + +- if (!(one = hlsl_new_int_constant(ctx, 1, &loc))) ++ if (!(one = hlsl_new_int_constant(ctx, 1, loc))) + return false; +- list_add_tail(instrs, &one->node.entry); ++ list_add_tail(instrs, &one->entry); + +- if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, &one->node)) ++ if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) + return false; + + if (post) + { +- struct hlsl_ir_expr *copy; ++ struct hlsl_ir_node *copy; + + if (!(copy = hlsl_new_copy(ctx, lhs))) + return false; +- list_add_tail(instrs, ©->node.entry); ++ list_add_tail(instrs, ©->entry); + + /* Post increment/decrement expressions are considered const. */ +- if (!(copy->node.data_type = hlsl_type_clone(ctx, copy->node.data_type, 0, HLSL_MODIFIER_CONST))) ++ if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) + return false; + } + +@@ -1861,10 +1859,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, + + for (k = 0; k < src_comp_count; ++k) + { ++ struct hlsl_ir_node *conv, *load; + struct hlsl_type *dst_comp_type; +- struct hlsl_ir_store *store; +- struct hlsl_ir_load *load; +- struct hlsl_ir_node *conv; + struct hlsl_block block; + + if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) +@@ -1872,10 +1868,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, + + dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); + +- if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc))) ++ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) + return; + +- if (!(store = hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))) ++ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) + return; + list_move_tail(instrs, &block.instrs); + +@@ -1885,12 +1881,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, + + static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) + { +- if (type->type == HLSL_CLASS_OBJECT) ++ if (type->class == HLSL_CLASS_OBJECT) + return !must_be_in_struct; +- if (type->type == HLSL_CLASS_ARRAY) ++ if (type->class == HLSL_CLASS_ARRAY) + return type_has_object_components(type->e.array.type, must_be_in_struct); + +- if (type->type == HLSL_CLASS_STRUCT) ++ if (type->class == HLSL_CLASS_STRUCT) + { + unsigned int i; + +@@ -1905,12 +1901,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s + + static bool type_has_numeric_components(struct hlsl_type *type) + { +- if (type->type <= HLSL_CLASS_LAST_NUMERIC) ++ if (type->class <= HLSL_CLASS_LAST_NUMERIC) + return true; +- if (type->type == HLSL_CLASS_ARRAY) ++ if (type->class == HLSL_CLASS_ARRAY) + return type_has_numeric_components(type->e.array.type); + +- if (type->type == HLSL_CLASS_STRUCT) ++ if (type->class == HLSL_CLASS_STRUCT) + { + unsigned int i; + +@@ -1934,7 +1930,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + struct hlsl_type *type; + bool local = true; + +- if (basic_type->type == HLSL_CLASS_MATRIX) ++ if (basic_type->class == HLSL_CLASS_MATRIX) + assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + if (!(statements_list = make_empty_list(ctx))) +@@ -1966,7 +1962,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + + type = basic_type; + +- if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) ++ if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + { + for (i = 0; i < v->arrays.count; ++i) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); +@@ -2035,7 +2031,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + } + vkd3d_free(v->arrays.sizes); + +- if (!(var = hlsl_new_var(ctx, v->name, type, v->loc, &v->semantic, modifiers, &v->reg_reservation))) ++ if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) + { + free_parse_variable_def(v); + continue; +@@ -2043,6 +2039,13 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + + var->buffer = ctx->cur_buffer; + ++ if (var->buffer == ctx->globals_buffer) ++ { ++ if (var->reg_reservation.offset_type) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() is only allowed inside constant buffer declarations."); ++ } ++ + if (ctx->cur_scope == ctx->globals) + { + local = false; +@@ -2148,7 +2151,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + } + else + { +- struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, var->loc); ++ struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); + + assert(v->initializer.args_count == 1); + list_add_tail(v->initializer.instrs, &load->node.entry); +@@ -2164,9 +2167,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + } + else if (var->storage_modifiers & HLSL_STORAGE_STATIC) + { +- struct hlsl_ir_constant *zero; +- struct hlsl_ir_store *store; +- struct hlsl_ir_node *cast; ++ struct hlsl_ir_node *cast, *store, *zero; + + /* Initialize statics to zero by default. */ + +@@ -2181,9 +2182,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + vkd3d_free(v); + continue; + } +- list_add_tail(&ctx->static_initializers, &zero->node.entry); ++ list_add_tail(&ctx->static_initializers, &zero->entry); + +- if (!(cast = add_cast(ctx, &ctx->static_initializers, &zero->node, var->data_type, &var->loc))) ++ if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) + { + vkd3d_free(v); + continue; +@@ -2194,7 +2195,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t + vkd3d_free(v); + continue; + } +- list_add_tail(&ctx->static_initializers, &store->node.entry); ++ list_add_tail(&ctx->static_initializers, &store->entry); + } + vkd3d_free(v); + } +@@ -2279,7 +2280,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, + if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) + return arg; + +- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + return add_implicit_conversion(ctx, params->instrs, arg, type, loc); + } + +@@ -2315,12 +2316,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * + + base = expr_common_base_type(base, arg_type->base_type); + +- if (arg_type->type == HLSL_CLASS_VECTOR) ++ if (arg_type->class == HLSL_CLASS_VECTOR) + { + vectors = true; + dimx = min(dimx, arg_type->dimx); + } +- else if (arg_type->type == HLSL_CLASS_MATRIX) ++ else if (arg_type->class == HLSL_CLASS_MATRIX) + { + matrices = true; + dimx = min(dimx, arg_type->dimx); +@@ -2369,7 +2370,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + +- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + + return convert_args(ctx, params, type, loc); + } +@@ -2383,20 +2384,18 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, + static bool intrinsic_all(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg = params->args[0], *mul; +- struct hlsl_ir_constant *one, *zero; +- struct hlsl_ir_load *load; ++ struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; + unsigned int i, count; + + if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) + return false; +- list_add_tail(params->instrs, &one->node.entry); ++ list_add_tail(params->instrs, &one->entry); + + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; +- list_add_tail(params->instrs, &zero->node.entry); ++ list_add_tail(params->instrs, &zero->entry); + +- mul = &one->node; ++ mul = one; + + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) +@@ -2404,46 +2403,117 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, + if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + +- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &load->node, mul, loc))) ++ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) + return false; + } + +- return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, &zero->node, loc); +-} +- +-/* Find the type corresponding to the given source type, with the same +- * dimensions but a different base type. */ +-static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, +- const struct hlsl_type *type, enum hlsl_base_type base_type) +-{ +- return hlsl_get_numeric_type(ctx, type->type, base_type, type->dimx, type->dimy); ++ return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); + } + +-static bool intrinsic_asuint(struct hlsl_ctx *ctx, ++static bool intrinsic_any(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; +- struct hlsl_type *data_type; ++ struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; ++ unsigned int i, count; + +- if (params->args_count != 1 && params->args_count != 3) ++ if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) + { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); ++ hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); + return false; + } + +- if (params->args_count == 3) ++ if (arg->data_type->base_type == HLSL_TYPE_FLOAT) + { +- hlsl_fixme(ctx, loc, "Double-to-integer conversion."); +- return false; +- } ++ if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) ++ return false; ++ list_add_tail(params->instrs, &zero->entry); + +- data_type = params->args[0]->data_type; +- if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) ++ if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) ++ return false; ++ ++ return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); ++ } ++ else if (arg->data_type->base_type == HLSL_TYPE_BOOL) + { +- struct vkd3d_string_buffer *string; ++ if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) ++ return false; ++ list_add_tail(params->instrs, &bfalse->entry); + +- if ((string = hlsl_type_to_string(ctx, data_type))) ++ or = bfalse; ++ ++ count = hlsl_type_component_count(arg->data_type); ++ for (i = 0; i < count; ++i) ++ { ++ if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) ++ return false; ++ ++ if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) ++ return false; ++ } ++ ++ return true; ++ } ++ ++ hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); ++ return false; ++} ++ ++/* Find the type corresponding to the given source type, with the same ++ * dimensions but a different base type. */ ++static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, ++ const struct hlsl_type *type, enum hlsl_base_type base_type) ++{ ++ return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); ++} ++ ++static bool intrinsic_asfloat(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_type *data_type; ++ ++ data_type = params->args[0]->data_type; ++ if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, data_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong argument type of asfloat(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", ++ string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); ++ ++ operands[0] = params->args[0]; ++ return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); ++} ++ ++static bool intrinsic_asuint(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_type *data_type; ++ ++ if (params->args_count != 1 && params->args_count != 3) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); ++ return false; ++ } ++ ++ if (params->args_count == 3) ++ { ++ hlsl_fixme(ctx, loc, "Double-to-integer conversion."); ++ return false; ++ } ++ ++ data_type = params->args[0]->data_type; ++ if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); +@@ -2483,7 +2553,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, + static bool intrinsic_cross(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_swizzle *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; ++ struct hlsl_ir_node *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; + struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; + struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg, *mul1, *mul2; + struct hlsl_type *cast_type; +@@ -2504,35 +2574,55 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, + + if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg1_swzl1->node.entry); ++ list_add_tail(params->instrs, &arg1_swzl1->entry); + + if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg2_swzl1->node.entry); ++ list_add_tail(params->instrs, &arg2_swzl1->entry); + +- if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, +- &arg1_swzl1->node, &arg2_swzl1->node, loc))) ++ if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) + return false; + +- if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, *loc))) ++ if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) + return false; + list_add_tail(params->instrs, &mul1_neg->entry); + + if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg1_swzl2->node.entry); ++ list_add_tail(params->instrs, &arg1_swzl2->entry); + + if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) + return false; +- list_add_tail(params->instrs, &arg2_swzl2->node.entry); ++ list_add_tail(params->instrs, &arg2_swzl2->entry); + +- if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, +- &arg1_swzl2->node, &arg2_swzl2->node, loc))) ++ if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, mul2, mul1_neg, loc); + } + ++static bool intrinsic_ddx(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); ++} ++ ++static bool intrinsic_ddy(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); ++} ++ + static bool intrinsic_distance(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2565,8 +2655,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, + static bool intrinsic_exp(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_constant *coeff; +- struct hlsl_ir_node *arg, *mul; ++ struct hlsl_ir_node *arg, *mul, *coeff; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; +@@ -2574,9 +2663,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, + /* 1/ln(2) */ + if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) + return false; +- list_add_tail(params->instrs, &coeff->node.entry); ++ list_add_tail(params->instrs, &coeff->entry); + +- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &coeff->node, params->args[0], loc))) ++ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); +@@ -2604,6 +2693,48 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); + } + ++static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer *params, ++ const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select; ++ struct hlsl_ir_constant *zero; ++ unsigned int count, i; ++ ++ if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ if (!(y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) ++ return false; ++ ++ if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) ++ return false; ++ ++ if (!(zero = hlsl_new_constant(ctx, div->data_type, loc))) ++ return false; ++ list_add_tail(params->instrs, &zero->node.entry); ++ ++ count = hlsl_type_element_count(div->data_type); ++ for (i = 0; i < count; ++i) ++ zero->value.u[i].f = 0.0f; ++ ++ if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) ++ return false; ++ ++ if (!(frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, abs, loc))) ++ return false; ++ ++ if (!(neg_frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, frac, loc))) ++ return false; ++ ++ if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, &zero->node, loc))) ++ return false; ++ ++ if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) ++ return false; ++ ++ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); ++} ++ + static bool intrinsic_frac(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2635,7 +2766,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, + struct hlsl_type *type = params->args[0]->data_type; + struct hlsl_ir_node *arg, *dot; + +- if (type->type == HLSL_CLASS_MATRIX) ++ if (type->class == HLSL_CLASS_MATRIX) + { + struct vkd3d_string_buffer *string; + +@@ -2692,20 +2823,18 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, + static bool intrinsic_lit(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow; +- struct hlsl_ir_constant *init, *zero; +- struct hlsl_ir_node *n_l, *n_h, *m; +- struct hlsl_ir_node *diffuse; +- struct hlsl_ir_store *store; ++ struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow, *load; ++ struct hlsl_ir_node *n_l, *n_h, *m, *diffuse, *zero, *store; ++ struct hlsl_ir_constant *init; ++ struct hlsl_ir_load *var_load; + struct hlsl_deref var_deref; + struct hlsl_type *ret_type; +- struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + struct hlsl_block block; + +- if (params->args[0]->data_type->type != HLSL_CLASS_SCALAR +- || params->args[1]->data_type->type != HLSL_CLASS_SCALAR +- || params->args[2]->data_type->type != HLSL_CLASS_SCALAR) ++ if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR ++ || params->args[1]->data_type->class != HLSL_CLASS_SCALAR ++ || params->args[2]->data_type->class != HLSL_CLASS_SCALAR) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); + return false; +@@ -2728,35 +2857,33 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, + + if (!(init = hlsl_new_constant(ctx, ret_type, loc))) + return false; +- init->value[0].f = 1.0f; +- init->value[1].f = 0.0f; +- init->value[2].f = 0.0f; +- init->value[3].f = 1.0f; ++ init->value.u[0].f = 1.0f; ++ init->value.u[1].f = 0.0f; ++ init->value.u[2].f = 0.0f; ++ init->value.u[3].f = 1.0f; + list_add_tail(params->instrs, &init->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, var, &init->node))) + return false; +- list_add_tail(params->instrs, &store->node.entry); ++ list_add_tail(params->instrs, &store->entry); + + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; +- list_add_tail(params->instrs, &zero->node.entry); ++ list_add_tail(params->instrs, &zero->entry); + + /* Diffuse component. */ +- if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, &zero->node, loc))) ++ if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) + return false; + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse))) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) + return false; + list_move_tail(params->instrs, &block.instrs); + + /* Specular component. */ +- if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, +- n_h, &zero->node, loc))) ++ if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) + return false; + +- if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, +- n_l, &zero->node, loc))) ++ if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) + return false; + + if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) +@@ -2765,20 +2892,67 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, + if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) + return false; + +- if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, &zero->node, specular_pow))) ++ if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) + return false; + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 2, &load->node))) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) + return false; + list_move_tail(params->instrs, &block.instrs); + +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) ++ if (!(var_load = hlsl_new_var_load(ctx, var, loc))) + return false; +- list_add_tail(params->instrs, &load->node.entry); ++ list_add_tail(params->instrs, &var_load->node.entry); + + return true; + } + ++static bool intrinsic_log(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *log, *arg, *coeff; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) ++ return false; ++ ++ /* ln(2) */ ++ if (!(coeff = hlsl_new_float_constant(ctx, 0.69314718055f, loc))) ++ return false; ++ ++ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); ++} ++ ++static bool intrinsic_log10(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *log, *arg, *coeff; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) ++ return false; ++ ++ /* 1 / log2(10) */ ++ if (!(coeff = hlsl_new_float_constant(ctx, 0.301029996f, loc))) ++ return false; ++ ++ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); ++} ++ ++static bool intrinsic_log2(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); ++} ++ + static bool intrinsic_max(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2808,15 +2982,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + +- if (arg1->data_type->type == HLSL_CLASS_SCALAR || arg2->data_type->type == HLSL_CLASS_SCALAR) ++ if (arg1->data_type->class == HLSL_CLASS_SCALAR || arg2->data_type->class == HLSL_CLASS_SCALAR) + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1, arg2, loc); + +- if (arg1->data_type->type == HLSL_CLASS_VECTOR) ++ if (arg1->data_type->class == HLSL_CLASS_VECTOR) + { + vect_count++; + cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); + } +- if (arg2->data_type->type == HLSL_CLASS_VECTOR) ++ if (arg2->data_type->class == HLSL_CLASS_VECTOR) + { + vect_count++; + cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); +@@ -2854,13 +3028,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + for (j = 0; j < matrix_type->dimy; ++j) + { + struct hlsl_ir_node *instr = NULL; +- struct hlsl_ir_store *store; + struct hlsl_block block; + + for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) + { +- struct hlsl_ir_load *value1, *value2; +- struct hlsl_ir_node *mul; ++ struct hlsl_ir_node *value1, *value2, *mul; + + if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) + return false; +@@ -2868,7 +3040,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) + return false; + +- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &value1->node, &value2->node, loc))) ++ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) + return false; + + if (instr) +@@ -2882,13 +3054,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + } + } + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr))) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) + return false; + list_move_tail(params->instrs, &block.instrs); + } + } + +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) ++ if (!(load = hlsl_new_var_load(ctx, var, loc))) + return false; + list_add_tail(params->instrs, &load->node.entry); + +@@ -2901,7 +3073,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, + struct hlsl_type *type = params->args[0]->data_type; + struct hlsl_ir_node *dot, *rsq, *arg; + +- if (type->type == HLSL_CLASS_MATRIX) ++ if (type->class == HLSL_CLASS_MATRIX) + { + struct vkd3d_string_buffer *string; + +@@ -2986,6 +3158,42 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); + } + ++static bool intrinsic_sign(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *lt, *neg, *op1, *op2, *arg = params->args[0]; ++ struct hlsl_ir_constant *zero; ++ ++ struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, ++ arg->data_type->dimx, arg->data_type->dimy); ++ ++ if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), loc))) ++ return false; ++ list_add_tail(params->instrs, &zero->node.entry); ++ ++ /* Check if 0 < arg, cast bool to int */ ++ ++ if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, &zero->node, arg, loc))) ++ return false; ++ ++ if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) ++ return false; ++ ++ /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ ++ ++ if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, &zero->node, loc))) ++ return false; ++ ++ if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) ++ return false; ++ ++ if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) ++ return false; ++ ++ /* Adding these two together will make 1 when > 0, -1 when < 0, and 0 when neither */ ++ return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, neg, op1, loc); ++} ++ + static bool intrinsic_sin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3001,8 +3209,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, + static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res; +- struct hlsl_ir_constant *one, *minus_two, *three; ++ struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res, *one, *minus_two, *three; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; +@@ -3022,9 +3229,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + + if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) + return false; +- list_add_tail(params->instrs, &one->node.entry); ++ list_add_tail(params->instrs, &one->entry); + +- if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc))) ++ if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) + return false; + + if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) +@@ -3035,16 +3242,16 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, + + if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) + return false; +- list_add_tail(params->instrs, &minus_two->node.entry); ++ list_add_tail(params->instrs, &minus_two->entry); + + if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) + return false; +- list_add_tail(params->instrs, &three->node.entry); ++ list_add_tail(params->instrs, &three->entry); + +- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &minus_two->node, p, loc))) ++ if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) + return false; + +- if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &three->node, res, loc))) ++ if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, three, res, loc))) + return false; + + if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) +@@ -3081,7 +3288,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, + return false; + + type = ge->data_type; +- type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); + } + +@@ -3090,9 +3297,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + { + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + const struct hlsl_type *sampler_type; +- struct hlsl_ir_resource_load *load; +- struct hlsl_ir_load *sampler_load; +- struct hlsl_ir_node *coords; ++ struct hlsl_ir_node *coords, *load; + + if (params->args_count != 2 && params->args_count != 4) + { +@@ -3107,7 +3312,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + } + + sampler_type = params->args[0]->data_type; +- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) + { + struct vkd3d_string_buffer *string; +@@ -3118,24 +3323,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + name, ctx->builtin_types.sampler[dim]->name, string->buffer); + hlsl_release_string_buffer(ctx, string); + } +- else +- { +- /* Only HLSL_IR_LOAD can return an object. */ +- sampler_load = hlsl_ir_load(params->args[0]); +- +- load_params.resource = sampler_load->src; +- } + + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) + coords = params->args[1]; + + load_params.coords = coords; ++ load_params.resource = params->args[0]; + load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); ++ load_params.sampling_dim = dim; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; +- list_add_tail(params->instrs, &load->node.entry); ++ list_add_tail(params->instrs, &load->entry); + return true; + } + +@@ -3156,13 +3356,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *arg = params->args[0]; + struct hlsl_type *arg_type = arg->data_type; ++ struct hlsl_ir_load *var_load; + struct hlsl_deref var_deref; + struct hlsl_type *mat_type; +- struct hlsl_ir_load *load; ++ struct hlsl_ir_node *load; + struct hlsl_ir_var *var; + unsigned int i, j; + +- if (arg_type->type != HLSL_CLASS_SCALAR && arg_type->type != HLSL_CLASS_MATRIX) ++ if (arg_type->class != HLSL_CLASS_SCALAR && arg_type->class != HLSL_CLASS_MATRIX) + { + struct vkd3d_string_buffer *string; + +@@ -3174,7 +3375,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + return false; + } + +- if (arg_type->type == HLSL_CLASS_SCALAR) ++ if (arg_type->class == HLSL_CLASS_SCALAR) + { + list_add_tail(params->instrs, &arg->entry); + return true; +@@ -3190,21 +3391,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + { + for (j = 0; j < arg_type->dimy; ++j) + { +- struct hlsl_ir_store *store; + struct hlsl_block block; + + if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + return false; + +- if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, &load->node))) ++ if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) + return false; + list_move_tail(params->instrs, &block.instrs); + } + } + +- if (!(load = hlsl_new_var_load(ctx, var, *loc))) ++ if (!(var_load = hlsl_new_var_load(ctx, var, loc))) + return false; +- list_add_tail(params->instrs, &load->node.entry); ++ list_add_tail(params->instrs, &var_load->node.entry); ++ ++ return true; ++} ++ ++static bool intrinsic_trunc(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, arg, loc); ++} ++ ++static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; ++ struct hlsl_type *arg_type = arg->data_type; ++ ++ if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, arg_type))) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Wrong argument type '%s'.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ ++ return false; ++ } ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) ++ return false; ++ ++ if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) ++ return false; ++ list_add_tail(params->instrs, &c->entry); ++ ++ if (arg_type->class == HLSL_CLASS_VECTOR) ++ { ++ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) ++ return false; ++ list_add_tail(params->instrs, &swizzle->entry); ++ ++ arg = swizzle; ++ } ++ ++ if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) ++ return false; ++ ++ if (ctx->profile->major_version >= 4) ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); + + return true; + } +@@ -3220,22 +3475,31 @@ static const struct intrinsic_function + intrinsic_functions[] = + { + /* Note: these entries should be kept in alphabetical order. */ ++ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, + {"abs", 1, true, intrinsic_abs}, + {"all", 1, true, intrinsic_all}, ++ {"any", 1, true, intrinsic_any}, ++ {"asfloat", 1, true, intrinsic_asfloat}, + {"asuint", -1, true, intrinsic_asuint}, + {"clamp", 3, true, intrinsic_clamp}, + {"cos", 1, true, intrinsic_cos}, + {"cross", 2, true, intrinsic_cross}, ++ {"ddx", 1, true, intrinsic_ddx}, ++ {"ddy", 1, true, intrinsic_ddy}, + {"distance", 2, true, intrinsic_distance}, + {"dot", 2, true, intrinsic_dot}, + {"exp", 1, true, intrinsic_exp}, + {"exp2", 1, true, intrinsic_exp2}, + {"floor", 1, true, intrinsic_floor}, ++ {"fmod", 2, true, intrinsic_fmod}, + {"frac", 1, true, intrinsic_frac}, + {"ldexp", 2, true, intrinsic_ldexp}, + {"length", 1, true, intrinsic_length}, + {"lerp", 3, true, intrinsic_lerp}, + {"lit", 3, true, intrinsic_lit}, ++ {"log", 1, true, intrinsic_log}, ++ {"log10", 1, true, intrinsic_log10}, ++ {"log2", 1, true, intrinsic_log2}, + {"max", 2, true, intrinsic_max}, + {"min", 2, true, intrinsic_min}, + {"mul", 2, true, intrinsic_mul}, +@@ -3245,6 +3509,7 @@ intrinsic_functions[] = + {"round", 1, true, intrinsic_round}, + {"rsqrt", 1, true, intrinsic_rsqrt}, + {"saturate", 1, true, intrinsic_saturate}, ++ {"sign", 1, true, intrinsic_sign}, + {"sin", 1, true, intrinsic_sin}, + {"smoothstep", 3, true, intrinsic_smoothstep}, + {"sqrt", 1, true, intrinsic_sqrt}, +@@ -3252,6 +3517,7 @@ intrinsic_functions[] = + {"tex2D", -1, false, intrinsic_tex2D}, + {"tex3D", -1, false, intrinsic_tex3D}, + {"transpose", 1, true, intrinsic_transpose}, ++ {"trunc", 1, true, intrinsic_trunc}, + }; + + static int intrinsic_function_name_compare(const void *a, const void *b) +@@ -3291,11 +3557,11 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + + if (param->storage_modifiers & HLSL_STORAGE_IN) + { +- struct hlsl_ir_store *store; ++ struct hlsl_ir_node *store; + + if (!(store = hlsl_new_simple_store(ctx, param, arg))) + goto fail; +- list_add_tail(args->instrs, &store->node.entry); ++ list_add_tail(args->instrs, &store->entry); + } + } + +@@ -3316,7 +3582,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + "Output argument to \"%s\" is const.", decl->func->name); + +- if (!(load = hlsl_new_var_load(ctx, param, arg->loc))) ++ if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) + goto fail; + list_add_tail(args->instrs, &load->node.entry); + +@@ -3329,7 +3595,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + { + struct hlsl_ir_load *load; + +- if (!(load = hlsl_new_var_load(ctx, decl->return_var, *loc))) ++ if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) + goto fail; + list_add_tail(args->instrs, &load->node.entry); + } +@@ -3360,7 +3626,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, + + for (i = 0; i < args->args_count; ++i) + { +- if (args->args[i]->data_type->type > HLSL_CLASS_LAST_NUMERIC) ++ if (args->args[i]->data_type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + +@@ -3397,71 +3663,408 @@ fail: + } + + static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, +- struct parse_initializer *params, struct vkd3d_shader_location loc) ++ struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + unsigned int i, idx = 0; + +- if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, &loc))) +- return NULL; ++ if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) ++ return NULL; ++ ++ for (i = 0; i < params->args_count; ++i) ++ { ++ struct hlsl_ir_node *arg = params->args[i]; ++ ++ if (arg->data_type->class == HLSL_CLASS_OBJECT) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, arg->data_type))) ++ hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Invalid type %s for constructor argument.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ continue; ++ } ++ ++ initialize_var_components(ctx, params->instrs, var, &idx, arg); ++ } ++ ++ if (!(load = hlsl_new_var_load(ctx, var, loc))) ++ return NULL; ++ list_add_tail(params->instrs, &load->node.entry); ++ ++ vkd3d_free(params->args); ++ return params->instrs; ++} ++ ++static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) ++{ ++ switch (dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ case HLSL_SAMPLER_DIM_1DARRAY: ++ return 1; ++ case HLSL_SAMPLER_DIM_2D: ++ case HLSL_SAMPLER_DIM_2DMS: ++ case HLSL_SAMPLER_DIM_2DARRAY: ++ case HLSL_SAMPLER_DIM_2DMSARRAY: ++ return 2; ++ case HLSL_SAMPLER_DIM_3D: ++ return 3; ++ case HLSL_SAMPLER_DIM_CUBE: ++ case HLSL_SAMPLER_DIM_CUBEARRAY: ++ /* Offset parameters not supported for these types. */ ++ return 0; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); ++ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); ++ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; ++ struct hlsl_ir_node *load; ++ bool multisampled; ++ ++ multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; ++ ++ if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", ++ 1 + multisampled, 3 + multisampled, params->args_count); ++ return false; ++ } ++ if (multisampled) ++ { ++ if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) ++ return false; ++ } ++ ++ assert(offset_dim); ++ if (params->args_count > 1 + multisampled) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) ++ return false; ++ } ++ if (params->args_count > 2 + multisampled) ++ { ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ } ++ ++ /* +1 for the mipmap level for non-multisampled textures */ ++ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) ++ return false; ++ ++ load_params.format = object_type->e.resource_format; ++ load_params.resource = object; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ list_add_tail(instrs, &load->entry); ++ return true; ++} ++ ++static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); ++ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); ++ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; ++ const struct hlsl_type *sampler_type; ++ struct hlsl_ir_node *load; ++ ++ if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", ++ 4 + !!offset_dim, params->args_count); ++ return false; ++ } ++ ++ sampler_type = params->args[0]->data_type; ++ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, sampler_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ return false; ++ ++ if (offset_dim && params->args_count > 2) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) ++ return false; ++ } ++ ++ if (params->args_count > 2 + !!offset_dim) ++ hlsl_fixme(ctx, loc, "Sample() clamp parameter."); ++ if (params->args_count > 3 + !!offset_dim) ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ ++ load_params.format = object_type->e.resource_format; ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ list_add_tail(instrs, &load->entry); ++ ++ return true; ++} ++ ++static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); ++ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); ++ struct hlsl_resource_load_params load_params = {0}; ++ const struct hlsl_type *sampler_type; ++ struct hlsl_ir_node *load; ++ unsigned int read_channel; ++ ++ if (!strcmp(name, "GatherGreen")) ++ { ++ load_params.type = HLSL_RESOURCE_GATHER_GREEN; ++ read_channel = 1; ++ } ++ else if (!strcmp(name, "GatherBlue")) ++ { ++ load_params.type = HLSL_RESOURCE_GATHER_BLUE; ++ read_channel = 2; ++ } ++ else if (!strcmp(name, "GatherAlpha")) ++ { ++ load_params.type = HLSL_RESOURCE_GATHER_ALPHA; ++ read_channel = 3; ++ } ++ else ++ { ++ load_params.type = HLSL_RESOURCE_GATHER_RED; ++ read_channel = 0; ++ } ++ ++ if (!strcmp(name, "Gather") || !offset_dim) ++ { ++ if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", ++ name, 3 + !!offset_dim, params->args_count); ++ return false; ++ } ++ } ++ else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", ++ name, params->args_count); ++ return false; ++ } ++ ++ if (params->args_count == 3 + !!offset_dim || params->args_count == 7) ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ ++ if (params->args_count == 6 || params->args_count == 7) ++ { ++ hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); ++ } ++ else if (offset_dim && params->args_count > 2) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) ++ return false; ++ } ++ ++ sampler_type = params->args[0]->data_type; ++ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, sampler_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (read_channel >= object_type->e.resource_format->dimx) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Method %s() requires at least %u channels.", name, read_channel + 1); ++ return false; ++ } ++ ++ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ return false; ++ ++ load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ list_add_tail(instrs, &load->entry); ++ return true; ++} ++ ++static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ struct hlsl_resource_load_params load_params = { 0 }; ++ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); ++ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); ++ const struct hlsl_type *sampler_type; ++ struct hlsl_ir_node *load; ++ ++ if (!strcmp(name, "SampleLevel")) ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; ++ else ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; ++ ++ if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", ++ name, 4 + !!offset_dim, params->args_count); ++ return false; ++ } ++ ++ sampler_type = params->args[0]->data_type; ++ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, sampler_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; ++ } ++ ++ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ load_params.coords = params->args[1]; ++ ++ if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) ++ load_params.lod = params->args[2]; ++ ++ if (offset_dim && params->args_count > 3) ++ { ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) ++ return false; ++ } ++ ++ if (params->args_count > 3 + !!offset_dim) ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ ++ load_params.format = object_type->e.resource_format; ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ list_add_tail(instrs, &load->entry); ++ return true; ++} ++ ++static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ const struct hlsl_type *object_type = object->data_type; ++ struct hlsl_resource_load_params load_params = { 0 }; ++ const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); ++ const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); ++ const struct hlsl_type *sampler_type; ++ struct hlsl_ir_node *load; ++ ++ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; + +- for (i = 0; i < params->args_count; ++i) ++ if (params->args_count < 4 || params->args_count > 5 + !!offset_dim) + { +- struct hlsl_ir_node *arg = params->args[i]; +- +- if (arg->data_type->type == HLSL_CLASS_OBJECT) +- { +- struct vkd3d_string_buffer *string; ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected from 4 to %u, but got %u.", ++ name, 5 + !!offset_dim, params->args_count); ++ return false; ++ } + +- if ((string = hlsl_type_to_string(ctx, arg->data_type))) +- hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid type %s for constructor argument.", string->buffer); +- hlsl_release_string_buffer(ctx, string); +- continue; +- } ++ sampler_type = params->args[0]->data_type; ++ if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER ++ || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) ++ { ++ struct vkd3d_string_buffer *string; + +- initialize_var_components(ctx, params->instrs, var, &idx, arg); ++ if ((string = hlsl_type_to_string(ctx, sampler_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return false; + } + +- if (!(load = hlsl_new_var_load(ctx, var, loc))) +- return NULL; +- list_add_tail(params->instrs, &load->node.entry); ++ if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ load_params.coords = params->args[1]; + +- vkd3d_free(params->args); +- return params->instrs; +-} ++ if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ load_params.ddx = params->args[2]; + +-static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) +-{ +- switch (dim) ++ if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ load_params.ddy = params->args[3]; ++ ++ if (offset_dim && params->args_count > 4) + { +- case HLSL_SAMPLER_DIM_1D: +- case HLSL_SAMPLER_DIM_1DARRAY: +- return 1; +- case HLSL_SAMPLER_DIM_2D: +- case HLSL_SAMPLER_DIM_2DMS: +- case HLSL_SAMPLER_DIM_2DARRAY: +- case HLSL_SAMPLER_DIM_2DMSARRAY: +- return 2; +- case HLSL_SAMPLER_DIM_3D: +- return 3; +- case HLSL_SAMPLER_DIM_CUBE: +- case HLSL_SAMPLER_DIM_CUBEARRAY: +- /* Offset parameters not supported for these types. */ +- return 0; +- default: +- vkd3d_unreachable(); ++ if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) ++ return false; + } ++ ++ if (params->args_count > 4 + !!offset_dim) ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ ++ load_params.format = object_type->e.resource_format; ++ load_params.resource = object; ++ load_params.sampler = params->args[0]; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ list_add_tail(instrs, &load->entry); ++ return true; + } + + static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +- struct hlsl_ir_load *object_load; + +- if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE ++ if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; +@@ -3473,119 +4076,17 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl + return false; + } + +- /* Only HLSL_IR_LOAD can return an object. */ +- object_load = hlsl_ir_load(object); +- + if (!strcmp(name, "Load") + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + { +- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); +- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; +- struct hlsl_ir_resource_load *load; +- bool multisampled; +- +- multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS +- || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; +- +- if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", +- 1 + multisampled, 3 + multisampled, params->args_count); +- return false; +- } +- if (multisampled) +- { +- hlsl_fixme(ctx, loc, "Load() sampling index parameter."); +- } +- +- assert(offset_dim); +- if (params->args_count > 1 + multisampled) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } +- if (params->args_count > 2 + multisampled) +- { +- hlsl_fixme(ctx, loc, "Tiled resource status argument."); +- } +- +- /* +1 for the mipmap level */ +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) +- return false; +- +- load_params.format = object_type->e.resource_format; +- load_params.resource = object_load->src; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- list_add_tail(instrs, &load->node.entry); +- return true; ++ return add_load_method_call(ctx, instrs, object, name, params, loc); + } + else if (!strcmp(name, "Sample") + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + { +- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); +- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; +- const struct hlsl_type *sampler_type; +- struct hlsl_ir_resource_load *load; +- struct hlsl_ir_load *sampler_load; +- +- if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", +- 4 + !!offset_dim, params->args_count); +- return false; +- } +- +- sampler_type = params->args[0]->data_type; +- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) +- { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_type_to_string(ctx, sampler_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); +- hlsl_release_string_buffer(ctx, string); +- return false; +- } +- +- /* Only HLSL_IR_LOAD can return an object. */ +- sampler_load = hlsl_ir_load(params->args[0]); +- +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- return false; +- +- if (offset_dim && params->args_count > 2) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } +- +- if (params->args_count > 2 + !!offset_dim) +- hlsl_fixme(ctx, loc, "Sample() clamp parameter."); +- if (params->args_count > 3 + !!offset_dim) +- hlsl_fixme(ctx, loc, "Tiled resource status argument."); +- +- load_params.format = object_type->e.resource_format; +- load_params.resource = object_load->src; +- load_params.sampler = sampler_load->src; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- list_add_tail(instrs, &load->node.entry); +- +- return true; ++ return add_sample_method_call(ctx, instrs, object, name, params, loc); + } + else if ((!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue") + || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha")) +@@ -3594,164 +4095,25 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl + || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY)) + { +- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); +- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +- struct hlsl_resource_load_params load_params = {0}; +- const struct hlsl_type *sampler_type; +- struct hlsl_ir_resource_load *load; +- struct hlsl_ir_load *sampler_load; +- unsigned int read_channel; +- +- if (!strcmp(name, "GatherGreen")) +- { +- load_params.type = HLSL_RESOURCE_GATHER_GREEN; +- read_channel = 1; +- } +- else if (!strcmp(name, "GatherBlue")) +- { +- load_params.type = HLSL_RESOURCE_GATHER_BLUE; +- read_channel = 2; +- } +- else if (!strcmp(name, "GatherAlpha")) +- { +- load_params.type = HLSL_RESOURCE_GATHER_ALPHA; +- read_channel = 3; +- } +- else +- { +- load_params.type = HLSL_RESOURCE_GATHER_RED; +- read_channel = 0; +- } +- +- if (!strcmp(name, "Gather") || !offset_dim) +- { +- if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", +- name, 3 + !!offset_dim, params->args_count); +- return false; +- } +- } +- else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", +- name, params->args_count); +- return false; +- } +- +- if (params->args_count == 3 + !!offset_dim || params->args_count == 7) +- hlsl_fixme(ctx, loc, "Tiled resource status argument."); +- +- if (params->args_count == 6 || params->args_count == 7) +- { +- hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); +- } +- else if (offset_dim && params->args_count > 2) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } +- +- sampler_type = params->args[0]->data_type; +- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) +- { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_type_to_string(ctx, sampler_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); +- hlsl_release_string_buffer(ctx, string); +- return false; +- } +- +- if (read_channel >= object_type->e.resource_format->dimx) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Method %s() requires at least %u channels.", name, read_channel + 1); +- return false; +- } +- +- /* Only HLSL_IR_LOAD can return an object. */ +- sampler_load = hlsl_ir_load(params->args[0]); +- +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- return false; +- +- load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); +- load_params.resource = object_load->src; +- load_params.sampler = sampler_load->src; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- list_add_tail(instrs, &load->node.entry); +- return true; ++ return add_gather_method_call(ctx, instrs, object, name, params, loc); + } + else if (!strcmp(name, "SampleLevel") + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + { +- struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE_LOD}; +- const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); +- const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); +- const struct hlsl_type *sampler_type; +- struct hlsl_ir_resource_load *load; +- struct hlsl_ir_load *sampler_load; +- +- if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) +- { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Wrong number of arguments to method 'SampleLevel': expected from 3 to %u, but got %u.", +- 4 + !!offset_dim, params->args_count); +- return false; +- } +- +- sampler_type = params->args[0]->data_type; +- if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER +- || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) +- { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_type_to_string(ctx, sampler_type))) +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong type for argument 0 of SampleLevel(): expected 'sampler', but got '%s'.", string->buffer); +- hlsl_release_string_buffer(ctx, string); +- return false; +- } +- +- /* Only HLSL_IR_LOAD can return an object. */ +- sampler_load = hlsl_ir_load(params->args[0]); +- +- if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) +- load_params.coords = params->args[1]; +- +- if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], +- hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) +- load_params.lod = params->args[2]; +- +- if (offset_dim && params->args_count > 3) +- { +- if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], +- hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) +- return false; +- } +- +- if (params->args_count > 3 + !!offset_dim) +- hlsl_fixme(ctx, loc, "Tiled resource status argument."); +- +- load_params.format = object_type->e.resource_format; +- load_params.resource = object_load->src; +- load_params.sampler = sampler_load->src; +- +- if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) +- return false; +- list_add_tail(instrs, &load->node.entry); +- return true; ++ return add_sample_lod_method_call(ctx, instrs, object, name, params, loc); ++ } ++ else if (!strcmp(name, "SampleBias") ++ && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS ++ && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) ++ { ++ return add_sample_lod_method_call(ctx, instrs, object, name, params, loc); ++ } ++ else if (!strcmp(name, "SampleGrad") ++ && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS ++ && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) ++ { ++ return add_sample_grad_method_call(ctx, instrs, object, name, params, loc); + } + else + { +@@ -3768,7 +4130,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl + static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type *format, + const struct vkd3d_shader_location *loc) + { +- if (format->type > HLSL_CLASS_VECTOR) ++ if (format->class > HLSL_CLASS_VECTOR) + { + struct vkd3d_string_buffer *string; + +@@ -3846,6 +4208,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %token KW_NAMESPACE + %token KW_NOINTERPOLATION + %token KW_OUT ++%token KW_PACKOFFSET + %token KW_PASS + %token KW_PIXELSHADER + %token KW_PRECISE +@@ -3854,6 +4217,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %token KW_RETURN + %token KW_REGISTER + %token KW_ROW_MAJOR ++%token KW_RWBUFFER ++%token KW_RWSTRUCTUREDBUFFER + %token KW_RWTEXTURE1D + %token KW_RWTEXTURE2D + %token KW_RWTEXTURE3D +@@ -3933,6 +4298,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %type conditional_expr + %type declaration + %type declaration_statement ++%type discard_statement + %type equality_expr + %type expr + %type expr_optional +@@ -3968,6 +4334,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %type attribute + + %type attribute_list ++%type attribute_list_optional + + %type boolean + +@@ -3999,6 +4366,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type + %type parameters + + %type register_opt ++%type packoffset_opt + + %type texture_type texture_ms_type uav_type + +@@ -4037,7 +4405,7 @@ buffer_declaration: + if ($3.semantic.name) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); + +- if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, @2))) ++ if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) + YYABORT; + } + +@@ -4261,6 +4629,14 @@ attribute_list: + $$.attrs[$$.count++] = $2; + } + ++attribute_list_optional: ++ %empty ++ { ++ $$.count = 0; ++ $$.attrs = NULL; ++ } ++ | attribute_list ++ + func_declaration: + func_prototype compound_statement + { +@@ -4349,8 +4725,11 @@ func_prototype_no_attrs: + "Semantics are not allowed on void functions."); + } + +- if ($7.reg_reservation.type) ++ if ($7.reg_reservation.reg_type) + FIXME("Unexpected register reservation for a function.\n"); ++ if ($7.reg_reservation.offset_type) ++ hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() is not allowed on functions."); + + if (($$.decl = get_func_decl(&ctx->functions, $3, &$5))) + { +@@ -4476,17 +4855,24 @@ var_identifier: + colon_attribute: + %empty + { +- $$.semantic.name = NULL; +- $$.reg_reservation.type = 0; ++ $$.semantic = (struct hlsl_semantic){0}; ++ $$.reg_reservation.reg_type = 0; ++ $$.reg_reservation.offset_type = 0; + } + | semantic + { + $$.semantic = $1; +- $$.reg_reservation.type = 0; ++ $$.reg_reservation.reg_type = 0; ++ $$.reg_reservation.offset_type = 0; + } + | register_opt + { +- $$.semantic.name = NULL; ++ $$.semantic = (struct hlsl_semantic){0}; ++ $$.reg_reservation = $1; ++ } ++ | packoffset_opt ++ { ++ $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; + } + +@@ -4499,6 +4885,9 @@ semantic: + ; + $$.name = $2; + $$.index = atoi(p); ++ $$.reported_missing = false; ++ $$.reported_duplicated_output_next_index = 0; ++ $$.reported_duplicated_input_incompatible_next_index = 0; + *p = 0; + } + +@@ -4518,6 +4907,21 @@ register_opt: + vkd3d_free($6); + } + ++packoffset_opt: ++ ':' KW_PACKOFFSET '(' any_identifier ')' ++ { ++ $$ = parse_packoffset(ctx, $4, NULL, &@$); ++ ++ vkd3d_free($4); ++ } ++ | ':' KW_PACKOFFSET '(' any_identifier '.' any_identifier ')' ++ { ++ $$ = parse_packoffset(ctx, $4, $6, &@$); ++ ++ vkd3d_free($4); ++ vkd3d_free($6); ++ } ++ + parameters: + scope_start + { +@@ -4536,7 +4940,7 @@ param_list: + parameter + { + memset(&$$, 0, sizeof($$)); +- if (!add_func_parameter(ctx, &$$, &$1, @1)) ++ if (!add_func_parameter(ctx, &$$, &$1, &@1)) + { + ERR("Error adding function parameter %s.\n", $1.name); + YYABORT; +@@ -4545,7 +4949,7 @@ param_list: + | param_list ',' parameter + { + $$ = $1; +- if (!add_func_parameter(ctx, &$$, &$3, @3)) ++ if (!add_func_parameter(ctx, &$$, &$3, &@3)) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Parameter \"%s\" is already declared.", $3.name); +@@ -4624,7 +5028,15 @@ texture_ms_type: + } + + uav_type: +- KW_RWTEXTURE1D ++ KW_RWBUFFER ++ { ++ $$ = HLSL_SAMPLER_DIM_BUFFER; ++ } ++ | KW_RWSTRUCTUREDBUFFER ++ { ++ $$ = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; ++ } ++ | KW_RWTEXTURE1D + { + $$ = HLSL_SAMPLER_DIM_1D; + } +@@ -4640,7 +5052,7 @@ uav_type: + type_no_void: + KW_VECTOR '<' type ',' C_INTEGER '>' + { +- if ($3->type != HLSL_CLASS_SCALAR) ++ if ($3->class != HLSL_CLASS_SCALAR) + { + struct vkd3d_string_buffer *string; + +@@ -4667,7 +5079,7 @@ type_no_void: + } + | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' + { +- if ($3->type != HLSL_CLASS_SCALAR) ++ if ($3->class != HLSL_CLASS_SCALAR) + { + struct vkd3d_string_buffer *string; + +@@ -4740,23 +5152,58 @@ type_no_void: + } + | texture_ms_type '<' type ',' shift_expr '>' + { +- unsigned int sample_count = evaluate_static_expression(node_from_list($5)); +- destroy_instr_list($5); ++ unsigned int sample_count; ++ struct hlsl_block block; ++ ++ hlsl_block_init(&block); ++ list_move_tail(&block.instrs, $5); ++ ++ sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); ++ ++ hlsl_block_cleanup(&block); ++ ++ vkd3d_free($5); + + $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); + } + | uav_type '<' type '>' + { +- if ($3->type > HLSL_CLASS_VECTOR) +- { +- struct vkd3d_string_buffer *string; ++ struct vkd3d_string_buffer *string = hlsl_type_to_string(ctx, $3); + +- string = hlsl_type_to_string(ctx, $3); ++ if (!type_contains_only_numerics($3)) ++ { + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "UAV data type %s is not scalar or vector.", string->buffer); +- hlsl_release_string_buffer(ctx, string); ++ "UAV type %s is not numeric.", string->buffer); ++ } ++ ++ switch ($1) ++ { ++ case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_1D: ++ case HLSL_SAMPLER_DIM_2D: ++ case HLSL_SAMPLER_DIM_3D: ++ if ($3->class == HLSL_CLASS_ARRAY) ++ { ++ if (string) ++ hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "This type of UAV does not support array type."); ++ } ++ else if (hlsl_type_component_count($3) > 4) ++ { ++ if (string) ++ hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "UAV data type %s size exceeds maximum size.", string->buffer); ++ } ++ break; ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ break; ++ default: ++ vkd3d_unreachable(); + } ++ ++ hlsl_release_string_buffer(ctx, string); ++ + $$ = hlsl_new_uav_type(ctx, $1, $3); + } + | TYPE_IDENTIFIER +@@ -4779,7 +5226,7 @@ type_no_void: + | KW_STRUCT TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); +- if ($$->type != HLSL_CLASS_STRUCT) ++ if ($$->class != HLSL_CLASS_STRUCT) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "\"%s\" redefined as a structure.", $2); + vkd3d_free($2); + } +@@ -4934,10 +5381,17 @@ arrays: + } + | '[' expr ']' arrays + { +- unsigned int size = evaluate_static_expression(node_from_list($2)); ++ struct hlsl_block block; + uint32_t *new_array; ++ unsigned int size; + +- destroy_instr_list($2); ++ hlsl_block_init(&block); ++ list_move_tail(&block.instrs, $2); ++ ++ size = evaluate_static_expression_as_uint(ctx, &block, &@2); ++ ++ hlsl_block_cleanup(&block); ++ vkd3d_free($2); + + $$ = $4; + +@@ -4988,59 +5442,59 @@ var_modifiers: + } + | KW_EXTERN var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, &@1); + } + | KW_NOINTERPOLATION var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, &@1); + } + | KW_PRECISE var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); + } + | KW_SHARED var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); + } + | KW_GROUPSHARED var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, &@1); + } + | KW_STATIC var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, &@1); + } + | KW_UNIFORM var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, &@1); + } + | KW_VOLATILE var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, &@1); + } + | KW_CONST var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, &@1); + } + | KW_ROW_MAJOR var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, &@1); + } + | KW_COLUMN_MAJOR var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, &@1); + } + | KW_IN var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, &@1); + } + | KW_OUT var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, &@1); + } + | KW_INOUT var_modifiers + { +- $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, @1); ++ $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); + } + + +@@ -5145,6 +5599,7 @@ statement: + declaration_statement + | expr_statement + | compound_statement ++ | discard_statement + | jump_statement + | selection_statement + | loop_statement +@@ -5152,7 +5607,7 @@ statement: + jump_statement: + KW_RETURN expr ';' + { +- if (!add_return(ctx, $2, node_from_list($2), @1)) ++ if (!add_return(ctx, $2, node_from_list($2), &@1)) + YYABORT; + $$ = $2; + } +@@ -5160,65 +5615,81 @@ jump_statement: + { + if (!($$ = make_empty_list(ctx))) + YYABORT; +- if (!add_return(ctx, $$, NULL, @1)) ++ if (!add_return(ctx, $$, NULL, &@1)) ++ YYABORT; ++ } ++ ++discard_statement: ++ KW_DISCARD ';' ++ { ++ struct hlsl_ir_node *discard; ++ ++ if (!($$ = make_empty_list(ctx))) + YYABORT; ++ if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) ++ return false; ++ list_add_tail($$, &discard->entry); + } + + selection_statement: + KW_IF '(' expr ')' if_body + { + struct hlsl_ir_node *condition = node_from_list($3); +- struct hlsl_ir_if *instr; +- +- if (!(instr = hlsl_new_if(ctx, condition, @1))) ++ struct hlsl_block then_block, else_block; ++ struct hlsl_ir_node *instr; ++ ++ hlsl_block_init(&then_block); ++ list_move_tail(&then_block.instrs, $5.then_block); ++ hlsl_block_init(&else_block); ++ if ($5.else_block) ++ list_move_tail(&else_block.instrs, $5.else_block); ++ vkd3d_free($5.then_block); ++ vkd3d_free($5.else_block); ++ ++ if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) + YYABORT; +- list_move_tail(&instr->then_instrs.instrs, $5.then_instrs); +- if ($5.else_instrs) +- list_move_tail(&instr->else_instrs.instrs, $5.else_instrs); +- vkd3d_free($5.then_instrs); +- vkd3d_free($5.else_instrs); + if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, condition->data_type))) +- hlsl_error(ctx, &instr->node.loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "if condition type %s is not scalar.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + $$ = $3; +- list_add_tail($$, &instr->node.entry); ++ list_add_tail($$, &instr->entry); + } + + if_body: + statement + { +- $$.then_instrs = $1; +- $$.else_instrs = NULL; ++ $$.then_block = $1; ++ $$.else_block = NULL; + } + | statement KW_ELSE statement + { +- $$.then_instrs = $1; +- $$.else_instrs = $3; ++ $$.then_block = $1; ++ $$.else_block = $3; + } + + loop_statement: +- KW_WHILE '(' expr ')' statement ++ attribute_list_optional KW_WHILE '(' expr ')' statement + { +- $$ = create_loop(ctx, LOOP_WHILE, NULL, $3, NULL, $5, @1); ++ $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $4, NULL, $6, &@2); + } +- | KW_DO statement KW_WHILE '(' expr ')' ';' ++ | attribute_list_optional KW_DO statement KW_WHILE '(' expr ')' ';' + { +- $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); ++ $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $6, NULL, $3, &@2); + } +- | KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement ++ | attribute_list_optional KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement + { +- $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); ++ $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); + hlsl_pop_scope(ctx); + } +- | KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement ++ | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement + { +- $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); ++ $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); + hlsl_pop_scope(ctx); + } + +@@ -5250,31 +5721,31 @@ func_arguments: + primary_expr: + C_FLOAT + { +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *c; + + if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, &c->node))) ++ if (!($$ = make_list(ctx, c))) + YYABORT; + } + | C_INTEGER + { +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *c; + + if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, &c->node))) ++ if (!($$ = make_list(ctx, c))) + YYABORT; + } + | boolean + { +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *c; + + if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) + YYABORT; +- if (!($$ = make_list(ctx, &c->node))) ++ if (!($$ = make_list(ctx, c))) + { +- hlsl_free_instr(&c->node); ++ hlsl_free_instr(c); + YYABORT; + } + } +@@ -5288,7 +5759,7 @@ primary_expr: + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); + YYABORT; + } +- if (!(load = hlsl_new_var_load(ctx, var, @1))) ++ if (!(load = hlsl_new_var_load(ctx, var, &@1))) + YYABORT; + if (!($$ = make_list(ctx, &load->node))) + YYABORT; +@@ -5316,7 +5787,7 @@ primary_expr: + if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", + hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) + YYABORT; +- if (!(load = hlsl_new_var_load(ctx, var, @1))) ++ if (!(load = hlsl_new_var_load(ctx, var, &@1))) + YYABORT; + if (!($$ = make_list(ctx, &load->node))) + YYABORT; +@@ -5332,7 +5803,7 @@ postfix_expr: + primary_expr + | postfix_expr OP_INC + { +- if (!add_increment(ctx, $1, false, true, @2)) ++ if (!add_increment(ctx, $1, false, true, &@2)) + { + destroy_instr_list($1); + YYABORT; +@@ -5341,7 +5812,7 @@ postfix_expr: + } + | postfix_expr OP_DEC + { +- if (!add_increment(ctx, $1, true, true, @2)) ++ if (!add_increment(ctx, $1, true, true, &@2)) + { + destroy_instr_list($1); + YYABORT; +@@ -5352,7 +5823,7 @@ postfix_expr: + { + struct hlsl_ir_node *node = node_from_list($1); + +- if (node->data_type->type == HLSL_CLASS_STRUCT) ++ if (node->data_type->class == HLSL_CLASS_STRUCT) + { + struct hlsl_type *type = node->data_type; + const struct hlsl_struct_field *field; +@@ -5365,20 +5836,20 @@ postfix_expr: + } + + field_idx = field - type->e.record.fields; +- if (!add_record_load(ctx, $1, node, field_idx, @2)) ++ if (!add_record_access(ctx, $1, node, field_idx, &@2)) + YYABORT; + $$ = $1; + } +- else if (node->data_type->type <= HLSL_CLASS_LAST_NUMERIC) ++ else if (node->data_type->class <= HLSL_CLASS_LAST_NUMERIC) + { +- struct hlsl_ir_swizzle *swizzle; ++ struct hlsl_ir_node *swizzle; + + if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); + YYABORT; + } +- list_add_tail($1, &swizzle->node.entry); ++ list_add_tail($1, &swizzle->entry); + $$ = $1; + } + else +@@ -5391,10 +5862,10 @@ postfix_expr: + { + struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); + +- list_move_tail($1, $3); ++ list_move_head($1, $3); + vkd3d_free($3); + +- if (!add_array_load(ctx, $1, array, index, &@2)) ++ if (!add_array_access(ctx, $1, array, index, &@2)) + { + destroy_instr_list($1); + YYABORT; +@@ -5412,7 +5883,7 @@ postfix_expr: + free_parse_initializer(&$4); + YYABORT; + } +- if ($2->type > HLSL_CLASS_LAST_NUMERIC) ++ if ($2->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + +@@ -5432,7 +5903,7 @@ postfix_expr: + YYABORT; + } + +- if (!($$ = add_constructor(ctx, $2, &$4, @2))) ++ if (!($$ = add_constructor(ctx, $2, &$4, &@2))) + { + free_parse_initializer(&$4); + YYABORT; +@@ -5459,7 +5930,7 @@ unary_expr: + postfix_expr + | OP_INC unary_expr + { +- if (!add_increment(ctx, $2, false, false, @1)) ++ if (!add_increment(ctx, $2, false, false, &@1)) + { + destroy_instr_list($2); + YYABORT; +@@ -5468,7 +5939,7 @@ unary_expr: + } + | OP_DEC unary_expr + { +- if (!add_increment(ctx, $2, true, false, @1)) ++ if (!add_increment(ctx, $2, true, false, &@1)) + { + destroy_instr_list($2); + YYABORT; +@@ -5545,31 +6016,31 @@ mul_expr: + unary_expr + | mul_expr '*' unary_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, @2); ++ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); + } + | mul_expr '/' unary_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, @2); ++ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); + } + | mul_expr '%' unary_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, @2); ++ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); + } + + add_expr: + mul_expr + | add_expr '+' mul_expr + { +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); ++ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + } + | add_expr '-' mul_expr + { + struct hlsl_ir_node *neg; + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), @2))) ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) + YYABORT; + list_add_tail($3, &neg->entry); +- $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); ++ $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + } + + shift_expr: +@@ -5587,30 +6058,30 @@ relational_expr: + shift_expr + | relational_expr '<' shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, @2); ++ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); + } + | relational_expr '>' shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, @2); ++ $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); + } + | relational_expr OP_LE shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, @2); ++ $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); + } + | relational_expr OP_GE shift_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, @2); ++ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); + } + + equality_expr: + relational_expr + | equality_expr OP_EQ relational_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, @2); ++ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); + } + | equality_expr OP_NE relational_expr + { +- $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, @2); ++ $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); + } + + bitand_expr: +@@ -5652,7 +6123,26 @@ conditional_expr: + logicor_expr + | logicor_expr '?' expr ':' assignment_expr + { +- hlsl_fixme(ctx, &@$, "Ternary operator."); ++ struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); ++ struct hlsl_type *common_type; ++ ++ list_move_tail($1, $3); ++ list_move_tail($1, $5); ++ vkd3d_free($3); ++ vkd3d_free($5); ++ ++ if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) ++ YYABORT; ++ ++ if (!(first = add_implicit_conversion(ctx, $1, first, common_type, &@3))) ++ YYABORT; ++ ++ if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) ++ YYABORT; ++ ++ if (!hlsl_add_conditional(ctx, $1, cond, first, second)) ++ YYABORT; ++ $$ = $1; + } + + assignment_expr: +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index ab59875738c..bbb5223b1ec 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -27,11 +27,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + enum hlsl_regset regset, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *idx_offset = NULL; +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *c; + +- list_init(&block->instrs); ++ hlsl_block_init(block); + +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_VECTOR: + idx_offset = idx; +@@ -41,11 +41,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + { + if (!(c = hlsl_new_uint_constant(ctx, 4, loc))) + return NULL; +- list_add_tail(&block->instrs, &c->node.entry); ++ hlsl_block_add_instr(block, c); + +- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) ++ if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) + return NULL; +- list_add_tail(&block->instrs, &idx_offset->entry); ++ hlsl_block_add_instr(block, idx_offset); + + break; + } +@@ -56,25 +56,25 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + + if (!(c = hlsl_new_uint_constant(ctx, size, loc))) + return NULL; +- list_add_tail(&block->instrs, &c->node.entry); ++ hlsl_block_add_instr(block, c); + +- if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) ++ if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) + return NULL; +- list_add_tail(&block->instrs, &idx_offset->entry); ++ hlsl_block_add_instr(block, idx_offset); + + break; + } + + case HLSL_CLASS_STRUCT: + { +- unsigned int field_idx = hlsl_ir_constant(idx)->value[0].u; ++ unsigned int field_idx = hlsl_ir_constant(idx)->value.u[0].u; + struct hlsl_struct_field *field = &type->e.record.fields[field_idx]; + + if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset[regset], loc))) + return NULL; +- list_add_tail(&block->instrs, &c->node.entry); ++ hlsl_block_add_instr(block, c); + +- idx_offset = &c->node; ++ idx_offset = c; + + break; + } +@@ -87,7 +87,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + { + if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, offset, idx_offset))) + return NULL; +- list_add_tail(&block->instrs, &idx_offset->entry); ++ hlsl_block_add_instr(block, idx_offset); + } + + return idx_offset; +@@ -101,7 +101,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st + struct hlsl_type *type; + unsigned int i; + +- list_init(&block->instrs); ++ hlsl_block_init(block); + + assert(deref->var); + type = deref->var->data_type; +@@ -114,7 +114,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st + deref->offset_regset, loc))) + return NULL; + +- list_move_tail(&block->instrs, &idx_block.instrs); ++ hlsl_block_add_block(block, &idx_block); + + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + } +@@ -140,7 +140,7 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der + + /* Instructions that directly refer to structs or arrays (instead of single-register components) + * are removed later by dce. So it is not a problem to just cleanup their derefs. */ +- if (type->type == HLSL_CLASS_STRUCT || type->type == HLSL_CLASS_ARRAY) ++ if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY) + { + hlsl_cleanup_deref(deref); + return; +@@ -191,14 +191,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru + { + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *uniform; +- struct hlsl_ir_store *store; ++ struct hlsl_ir_node *store; + struct hlsl_ir_load *load; + + /* Use the synthetic name for the temp, rather than the uniform, so that we + * can write the uniform name into the shader reflection data. */ + + if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, +- temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) ++ &temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) + return; + list_add_before(&temp->scope_entry, &uniform->scope_entry); + list_add_tail(&ctx->extern_vars, &uniform->extern_entry); +@@ -212,17 +212,53 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru + temp->name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + +- if (!(load = hlsl_new_var_load(ctx, uniform, temp->loc))) ++ if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) + return; + list_add_head(instrs, &load->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) + return; +- list_add_after(&load->node.entry, &store->node.entry); ++ list_add_after(&load->node.entry, &store->entry); ++} ++ ++static void validate_field_semantic(struct hlsl_ctx *ctx, struct hlsl_struct_field *field) ++{ ++ if (!field->semantic.name && hlsl_get_multiarray_element_type(field->type)->class <= HLSL_CLASS_LAST_NUMERIC ++ && !field->semantic.reported_missing) ++ { ++ hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, ++ "Field '%s' is missing a semantic.", field->name); ++ field->semantic.reported_missing = true; ++ } ++} ++ ++static enum hlsl_base_type base_type_get_semantic_equivalent(enum hlsl_base_type base) ++{ ++ if (base == HLSL_TYPE_BOOL) ++ return HLSL_TYPE_UINT; ++ if (base == HLSL_TYPE_INT) ++ return HLSL_TYPE_UINT; ++ if (base == HLSL_TYPE_HALF) ++ return HLSL_TYPE_FLOAT; ++ return base; ++} ++ ++static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hlsl_type *type1, ++ const struct hlsl_type *type2) ++{ ++ if (ctx->profile->major_version < 4) ++ return true; ++ ++ if (type1->dimx != type2->dimx) ++ return false; ++ ++ return base_type_get_semantic_equivalent(type1->base_type) ++ == base_type_get_semantic_equivalent(type2->base_type); + } + + static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, +- struct hlsl_type *type, unsigned int modifiers, const struct hlsl_semantic *semantic, bool output) ++ struct hlsl_type *type, unsigned int modifiers, struct hlsl_semantic *semantic, ++ uint32_t index, bool output, const struct vkd3d_shader_location *loc) + { + struct hlsl_semantic new_semantic; + struct vkd3d_string_buffer *name; +@@ -230,15 +266,50 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + + if (!(name = hlsl_get_string_buffer(ctx))) + return NULL; +- vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, semantic->index); ++ vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, index); ++ ++ LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (!ascii_strcasecmp(ext_var->name, name->buffer)) ++ { ++ if (output) ++ { ++ if (index >= semantic->reported_duplicated_output_next_index) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "Output semantic \"%s%u\" is used multiple times.", semantic->name, index); ++ hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, ++ "First use of \"%s%u\" is here.", semantic->name, index); ++ semantic->reported_duplicated_output_next_index = index + 1; ++ } ++ } ++ else ++ { ++ if (index >= semantic->reported_duplicated_input_incompatible_next_index ++ && !types_are_semantic_equivalent(ctx, ext_var->data_type, type)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "Input semantic \"%s%u\" is used multiple times with incompatible types.", ++ semantic->name, index); ++ hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, ++ "First declaration of \"%s%u\" is here.", semantic->name, index); ++ semantic->reported_duplicated_input_incompatible_next_index = index + 1; ++ } ++ } ++ ++ hlsl_release_string_buffer(ctx, name); ++ return ext_var; ++ } ++ } ++ + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) + { + hlsl_release_string_buffer(ctx, name); + return NULL; + } +- new_semantic.index = semantic->index; +- if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), +- type, var->loc, &new_semantic, modifiers, NULL))) ++ new_semantic.index = index; ++ if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, loc, &new_semantic, ++ modifiers, NULL))) + { + hlsl_release_string_buffer(ctx, name); + hlsl_cleanup_semantic(&new_semantic); +@@ -257,80 +328,116 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + } + + static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, +- unsigned int modifiers, const struct hlsl_semantic *semantic) ++ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { +- struct hlsl_type *type = lhs->node.data_type, *vector_type; ++ struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; ++ struct vkd3d_shader_location *loc = &lhs->node.loc; + struct hlsl_ir_var *var = lhs->src.var; ++ struct hlsl_ir_node *c; + unsigned int i; + +- vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); ++ if (type->class > HLSL_CLASS_LAST_NUMERIC) ++ { ++ struct vkd3d_string_buffer *string; ++ if (!(string = hlsl_type_to_string(ctx, type))) ++ return; ++ hlsl_fixme(ctx, &var->loc, "Input semantics for type %s.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ if (!semantic->name) ++ return; ++ ++ vector_type_src = hlsl_get_vector_type(ctx, type->base_type, ++ (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); ++ vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + + for (i = 0; i < hlsl_type_major_size(type); ++i) + { +- struct hlsl_semantic semantic_copy = *semantic; +- struct hlsl_ir_store *store; +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *store, *cast; + struct hlsl_ir_var *input; + struct hlsl_ir_load *load; + +- semantic_copy.index = semantic->index + i; +- +- if (!(input = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, false))) ++ if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, ++ semantic_index + i, false, loc))) + return; + +- if (!(load = hlsl_new_var_load(ctx, input, var->loc))) ++ if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) + return; + list_add_after(&lhs->node.entry, &load->node.entry); + +- if (type->type == HLSL_CLASS_MATRIX) ++ if (!(cast = hlsl_new_cast(ctx, &load->node, vector_type_dst, &var->loc))) ++ return; ++ list_add_after(&load->node.entry, &cast->entry); ++ ++ if (type->class == HLSL_CLASS_MATRIX) + { + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- list_add_after(&load->node.entry, &c->node.entry); ++ list_add_after(&cast->entry, &c->entry); + +- if (!(store = hlsl_new_store_index(ctx, &lhs->src, &c->node, &load->node, 0, &var->loc))) ++ if (!(store = hlsl_new_store_index(ctx, &lhs->src, c, cast, 0, &var->loc))) + return; +- list_add_after(&c->node.entry, &store->node.entry); ++ list_add_after(&c->entry, &store->entry); + } + else + { + assert(i == 0); + +- if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, &load->node, 0, &var->loc))) ++ if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) + return; +- list_add_after(&load->node.entry, &store->node.entry); ++ list_add_after(&cast->entry, &store->entry); + } + } + } + +-static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs) ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, ++ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { ++ struct vkd3d_shader_location *loc = &lhs->node.loc; + struct hlsl_type *type = lhs->node.data_type; + struct hlsl_ir_var *var = lhs->src.var; +- size_t i; ++ struct hlsl_ir_node *c; ++ unsigned int i; + +- for (i = 0; i < type->e.record.field_count; ++i) ++ if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) + { +- const struct hlsl_struct_field *field = &type->e.record.fields[i]; +- struct hlsl_ir_load *field_load; +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_load *element_load; ++ struct hlsl_struct_field *field; ++ uint32_t elem_semantic_index; + +- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) +- return; +- list_add_after(&lhs->node.entry, &c->node.entry); ++ for (i = 0; i < hlsl_type_element_count(type); ++i) ++ { ++ if (type->class == HLSL_CLASS_ARRAY) ++ { ++ elem_semantic_index = semantic_index ++ + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; ++ } ++ else ++ { ++ field = &type->e.record.fields[i]; ++ if (hlsl_type_is_resource(field->type)) ++ continue; ++ validate_field_semantic(ctx, field); ++ semantic = &field->semantic; ++ elem_semantic_index = semantic->index; ++ loc = &field->loc; ++ } + +- /* This redundant load is expected to be deleted later by DCE. */ +- if (!(field_load = hlsl_new_load_index(ctx, &lhs->src, &c->node, &var->loc))) +- return; +- list_add_after(&c->node.entry, &field_load->node.entry); ++ if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) ++ return; ++ list_add_after(&lhs->node.entry, &c->entry); + +- if (field->type->type == HLSL_CLASS_STRUCT) +- prepend_input_struct_copy(ctx, instrs, field_load); +- else if (field->semantic.name) +- prepend_input_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); +- else +- hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, +- "Field '%s' is missing a semantic.", field->name); ++ /* This redundant load is expected to be deleted later by DCE. */ ++ if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) ++ return; ++ list_add_after(&c->entry, &element_load->node.entry); ++ ++ prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ } ++ } ++ else ++ { ++ prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); + } + } + +@@ -341,45 +448,51 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ +- if (!(load = hlsl_new_var_load(ctx, var, var->loc))) ++ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; + list_add_head(instrs, &load->node.entry); + +- if (var->data_type->type == HLSL_CLASS_STRUCT) +- prepend_input_struct_copy(ctx, instrs, load); +- else if (var->semantic.name) +- prepend_input_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); ++ prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + + static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, +- unsigned int modifiers, const struct hlsl_semantic *semantic) ++ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = rhs->node.data_type, *vector_type; ++ struct vkd3d_shader_location *loc = &rhs->node.loc; + struct hlsl_ir_var *var = rhs->src.var; ++ struct hlsl_ir_node *c; + unsigned int i; + ++ if (type->class > HLSL_CLASS_LAST_NUMERIC) ++ { ++ struct vkd3d_string_buffer *string; ++ if (!(string = hlsl_type_to_string(ctx, type))) ++ return; ++ hlsl_fixme(ctx, &var->loc, "Output semantics for type %s.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ if (!semantic->name) ++ return; ++ + vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + + for (i = 0; i < hlsl_type_major_size(type); ++i) + { +- struct hlsl_semantic semantic_copy = *semantic; +- struct hlsl_ir_store *store; +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_node *store; + struct hlsl_ir_var *output; + struct hlsl_ir_load *load; + +- semantic_copy.index = semantic->index + i; +- +- if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, true))) ++ if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) + return; + +- if (type->type == HLSL_CLASS_MATRIX) ++ if (type->class == HLSL_CLASS_MATRIX) + { + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- list_add_tail(instrs, &c->node.entry); ++ list_add_tail(instrs, &c->entry); + +- if (!(load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) ++ if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) + return; + list_add_tail(instrs, &load->node.entry); + } +@@ -394,38 +507,57 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + + if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) + return; +- list_add_tail(instrs, &store->node.entry); ++ list_add_tail(instrs, &store->entry); + } + } + +-static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs) ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, ++ unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { ++ struct vkd3d_shader_location *loc = &rhs->node.loc; + struct hlsl_type *type = rhs->node.data_type; + struct hlsl_ir_var *var = rhs->src.var; +- size_t i; ++ struct hlsl_ir_node *c; ++ unsigned int i; + +- for (i = 0; i < type->e.record.field_count; ++i) ++ if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) + { +- const struct hlsl_struct_field *field = &type->e.record.fields[i]; +- struct hlsl_ir_load *field_load; +- struct hlsl_ir_constant *c; ++ struct hlsl_ir_load *element_load; ++ struct hlsl_struct_field *field; ++ uint32_t elem_semantic_index; + +- if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) +- return; +- list_add_tail(instrs, &c->node.entry); ++ for (i = 0; i < hlsl_type_element_count(type); ++i) ++ { ++ if (type->class == HLSL_CLASS_ARRAY) ++ { ++ elem_semantic_index = semantic_index ++ + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; ++ } ++ else ++ { ++ field = &type->e.record.fields[i]; ++ if (hlsl_type_is_resource(field->type)) ++ continue; ++ validate_field_semantic(ctx, field); ++ semantic = &field->semantic; ++ elem_semantic_index = semantic->index; ++ loc = &field->loc; ++ } + +- /* This redundant load is expected to be deleted later by DCE. */ +- if (!(field_load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) +- return; +- list_add_tail(instrs, &field_load->node.entry); ++ if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) ++ return; ++ list_add_tail(instrs, &c->entry); + +- if (field->type->type == HLSL_CLASS_STRUCT) +- append_output_struct_copy(ctx, instrs, field_load); +- else if (field->semantic.name) +- append_output_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); +- else +- hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, +- "Field '%s' is missing a semantic.", field->name); ++ if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) ++ return; ++ list_add_tail(instrs, &element_load->node.entry); ++ ++ append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ } ++ } ++ else ++ { ++ append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); + } + } + +@@ -437,17 +569,14 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ +- if (!(load = hlsl_new_var_load(ctx, var, var->loc))) ++ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; + list_add_tail(instrs, &load->node.entry); + +- if (var->data_type->type == HLSL_CLASS_STRUCT) +- append_output_struct_copy(ctx, instrs, load); +- else if (var->semantic.name) +- append_output_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); ++ append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + +-static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), ++bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context) + { + struct hlsl_ir_node *instr, *next; +@@ -459,11 +588,11 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + +- progress |= transform_ir(ctx, func, &iff->then_instrs, context); +- progress |= transform_ir(ctx, func, &iff->else_instrs, context); ++ progress |= hlsl_transform_ir(ctx, func, &iff->then_block, context); ++ progress |= hlsl_transform_ir(ctx, func, &iff->else_block, context); + } + else if (instr->type == HLSL_IR_LOOP) +- progress |= transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); ++ progress |= hlsl_transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); + + progress |= func(ctx, instr, context); + } +@@ -506,7 +635,7 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + return false; + call_ctx->backtrace[call_ctx->count++] = decl; + +- transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); ++ hlsl_transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); + + --call_ctx->count; + +@@ -516,21 +645,23 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + static void insert_early_return_break(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) + { +- struct hlsl_ir_jump *jump; ++ struct hlsl_ir_node *iff, *jump; ++ struct hlsl_block then_block; + struct hlsl_ir_load *load; +- struct hlsl_ir_if *iff; + +- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) ++ hlsl_block_init(&then_block); ++ ++ if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) + return; + list_add_after(&cf_instr->entry, &load->node.entry); + +- if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) ++ if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) + return; +- list_add_after(&load->node.entry, &iff->node.entry); ++ hlsl_block_add_instr(&then_block, jump); + +- if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, cf_instr->loc))) ++ if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc))) + return; +- list_add_tail(&iff->then_instrs.instrs, &jump->node.entry); ++ list_add_after(&load->node.entry, &iff->entry); + } + + /* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ +@@ -566,7 +697,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun + * the CF instruction, shove it into an if block, and then lower that if + * block. + * +- * (We could return a "did we make progress" boolean like transform_ir() ++ * (We could return a "did we make progress" boolean like hlsl_transform_ir() + * and run this pass multiple times, but we already know the only block + * that still needs to be addressed, so there's not much point.) + * +@@ -591,8 +722,8 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + +- has_early_return |= lower_return(ctx, func, &iff->then_instrs, in_loop); +- has_early_return |= lower_return(ctx, func, &iff->else_instrs, in_loop); ++ has_early_return |= lower_return(ctx, func, &iff->then_block, in_loop); ++ has_early_return |= lower_return(ctx, func, &iff->else_block, in_loop); + + if (has_early_return) + { +@@ -628,18 +759,17 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun + else if (instr->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); +- struct hlsl_ir_constant *constant; +- struct hlsl_ir_store *store; ++ struct hlsl_ir_node *constant, *store; + + if (jump->type == HLSL_IR_JUMP_RETURN) + { + if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) + return false; +- list_add_before(&jump->node.entry, &constant->node.entry); ++ list_add_before(&jump->node.entry, &constant->entry); + +- if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) ++ if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, constant))) + return false; +- list_add_after(&constant->node.entry, &store->node.entry); ++ list_add_after(&constant->entry, &store->entry); + + has_early_return = true; + if (in_loop) +@@ -675,9 +805,9 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun + else if (cf_instr) + { + struct list *tail = list_tail(&block->instrs); ++ struct hlsl_ir_node *not, *iff; ++ struct hlsl_block then_block; + struct hlsl_ir_load *load; +- struct hlsl_ir_node *not; +- struct hlsl_ir_if *iff; + + /* If we're in a loop, we should have used "break" instead. */ + assert(!in_loop); +@@ -685,21 +815,21 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun + if (tail == &cf_instr->entry) + return has_early_return; + +- if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) +- return false; +- list_add_tail(&block->instrs, &load->node.entry); ++ hlsl_block_init(&then_block); ++ list_move_slice_tail(&then_block.instrs, list_next(&block->instrs, &cf_instr->entry), tail); ++ lower_return(ctx, func, &then_block, in_loop); + +- if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) ++ if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) + return false; +- list_add_tail(&block->instrs, ¬->entry); ++ hlsl_block_add_instr(block, &load->node); + +- if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) ++ if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, &cf_instr->loc))) + return false; +- list_add_tail(&block->instrs, &iff->node.entry); +- +- list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail); ++ hlsl_block_add_instr(block, not); + +- lower_return(ctx, func, &iff->then_instrs, in_loop); ++ if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &cf_instr->loc))) ++ return false; ++ list_add_tail(&block->instrs, &iff->entry); + } + + return has_early_return; +@@ -721,7 +851,6 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * + hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Function \"%s\" is not defined.", decl->func->name); + +- list_init(&block.instrs); + if (!hlsl_clone_block(ctx, &block, &decl->body)) + return false; + list_move_before(&call->node.entry, &block.instrs); +@@ -731,6 +860,142 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * + return true; + } + ++static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, ++ const struct vkd3d_shader_location *loc) ++{ ++ unsigned int dim_count = index->data_type->dimx; ++ struct hlsl_ir_node *store, *zero; ++ struct hlsl_ir_load *coords_load; ++ struct hlsl_deref coords_deref; ++ struct hlsl_ir_var *coords; ++ ++ assert(dim_count < 4); ++ ++ if (!(coords = hlsl_new_synthetic_var(ctx, "coords", ++ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) ++ return NULL; ++ ++ hlsl_init_simple_deref_from_var(&coords_deref, coords); ++ if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) ++ return NULL; ++ list_add_after(&index->entry, &store->entry); ++ ++ if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) ++ return NULL; ++ list_add_after(&store->entry, &zero->entry); ++ ++ if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) ++ return NULL; ++ list_add_after(&zero->entry, &store->entry); ++ ++ if (!(coords_load = hlsl_new_var_load(ctx, coords, loc))) ++ return NULL; ++ list_add_after(&store->entry, &coords_load->node.entry); ++ ++ return &coords_load->node; ++} ++ ++/* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct ++ * record access before knowing if they will be used in the lhs of an assignment --in which case ++ * they are lowered into a deref-- or as the load of an element within a larger value. ++ * For the latter case, this pass takes care of lowering hlsl_ir_indexes into individual ++ * hlsl_ir_loads, or individual hlsl_ir_resource_loads, in case the indexing is a ++ * resource access. */ ++static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *val, *store; ++ struct hlsl_deref var_deref; ++ struct hlsl_ir_index *index; ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_var *var; ++ ++ if (instr->type != HLSL_IR_INDEX) ++ return false; ++ index = hlsl_ir_index(instr); ++ val = index->val.node; ++ ++ if (hlsl_index_is_resource_access(index)) ++ { ++ unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim); ++ struct hlsl_ir_node *coords = index->idx.node; ++ struct hlsl_resource_load_params params = {0}; ++ struct hlsl_ir_node *load; ++ ++ assert(coords->data_type->class == HLSL_CLASS_VECTOR); ++ assert(coords->data_type->base_type == HLSL_TYPE_UINT); ++ assert(coords->data_type->dimx == dim_count); ++ ++ if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) ++ return false; ++ ++ params.type = HLSL_RESOURCE_LOAD; ++ params.resource = val; ++ params.coords = coords; ++ params.format = val->data_type->e.resource_format; ++ ++ if (!(load = hlsl_new_resource_load(ctx, ¶ms, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &load->entry); ++ hlsl_replace_node(instr, load); ++ return true; ++ } ++ ++ if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc))) ++ return false; ++ hlsl_init_simple_deref_from_var(&var_deref, var); ++ ++ if (!(store = hlsl_new_simple_store(ctx, var, val))) ++ return false; ++ list_add_before(&instr->entry, &store->entry); ++ ++ if (hlsl_index_is_noncontiguous(index)) ++ { ++ struct hlsl_ir_node *mat = index->val.node; ++ struct hlsl_deref row_deref; ++ unsigned int i; ++ ++ assert(!hlsl_type_is_row_major(mat->data_type)); ++ ++ if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) ++ return false; ++ hlsl_init_simple_deref_from_var(&row_deref, var); ++ ++ for (i = 0; i < mat->data_type->dimx; ++i) ++ { ++ struct hlsl_ir_node *c; ++ ++ if (!(c = hlsl_new_uint_constant(ctx, i, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &c->entry); ++ ++ if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &load->node.entry); ++ ++ if (!(load = hlsl_new_load_index(ctx, &load->src, index->idx.node, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &load->node.entry); ++ ++ if (!(store = hlsl_new_store_index(ctx, &row_deref, c, &load->node, 0, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &store->entry); ++ } ++ ++ if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &load->node.entry); ++ hlsl_replace_node(instr, &load->node); ++ } ++ else ++ { ++ if (!(load = hlsl_new_load_index(ctx, &var_deref, index->idx.node, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &load->node.entry); ++ hlsl_replace_node(instr, &load->node); ++ } ++ return true; ++} ++ + /* Lower casts from vec1 to vecN to swizzles. */ + static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +@@ -746,26 +1011,24 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v + src_type = cast->operands[0].node->data_type; + dst_type = cast->node.data_type; + +- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && src_type->dimx == 1) ++ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) + { +- struct hlsl_ir_node *replacement; +- struct hlsl_ir_swizzle *swizzle; +- struct hlsl_ir_expr *new_cast; ++ struct hlsl_ir_node *replacement, *new_cast, *swizzle; + + dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); + /* We need to preserve the cast since it might be doing more than just + * turning the scalar into a vector. */ + if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) + return false; +- list_add_after(&cast->node.entry, &new_cast->node.entry); +- replacement = &new_cast->node; ++ list_add_after(&cast->node.entry, &new_cast->entry); ++ replacement = new_cast; + + if (dst_type->dimx != 1) + { + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, replacement, &cast->node.loc))) + return false; +- list_add_after(&new_cast->node.entry, &swizzle->node.entry); +- replacement = &swizzle->node; ++ list_add_after(&new_cast->entry, &swizzle->entry); ++ replacement = swizzle; + } + + hlsl_replace_node(&cast->node, replacement); +@@ -949,9 +1212,9 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ + path_node = deref->path[depth].node; + subtype = hlsl_get_element_type_from_path_index(ctx, type, path_node); + +- if (type->type == HLSL_CLASS_STRUCT) ++ if (type->class == HLSL_CLASS_STRUCT) + { +- unsigned int idx = hlsl_ir_constant(path_node)->value[0].u; ++ unsigned int idx = hlsl_ir_constant(path_node)->value.u[0].u; + + for (i = 0; i < idx; ++i) + comp_start += hlsl_type_component_count(type->e.record.fields[i].type); +@@ -966,7 +1229,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ + if (path_node->type == HLSL_IR_CONSTANT) + { + copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, +- depth + 1, hlsl_ir_constant(path_node)->value[0].u * subtype_comp_count, writemask); ++ depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, writemask); + } + else + { +@@ -1041,14 +1304,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, + var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), + new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); + +- if (instr->data_type->type != HLSL_CLASS_OBJECT) ++ if (instr->data_type->class != HLSL_CLASS_OBJECT) + { +- struct hlsl_ir_swizzle *swizzle_node; ++ struct hlsl_ir_node *swizzle_node; + + if (!(swizzle_node = hlsl_new_swizzle(ctx, ret_swizzle, instr_component_count, new_instr, &instr->loc))) + return false; +- list_add_before(&instr->entry, &swizzle_node->node.entry); +- new_instr = &swizzle_node->node; ++ list_add_before(&instr->entry, &swizzle_node->entry); ++ new_instr = swizzle_node; + } + + hlsl_replace_node(instr, new_instr); +@@ -1061,7 +1324,7 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, + { + const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); + const struct hlsl_ir_var *var = deref->var; +- union hlsl_constant_value values[4] = {0}; ++ struct hlsl_constant_value values = {0}; + struct hlsl_ir_constant *cons; + unsigned int start, count, i; + +@@ -1076,15 +1339,12 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, + || value->node->type != HLSL_IR_CONSTANT) + return false; + +- values[i] = hlsl_ir_constant(value->node)->value[value->component]; ++ values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component]; + } + + if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) + return false; +- cons->value[0] = values[0]; +- cons->value[1] = values[1]; +- cons->value[2] = values[2]; +- cons->value[3] = values[3]; ++ cons->value = values; + list_add_before(&instr->entry, &cons->node.entry); + + TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n", +@@ -1099,7 +1359,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + { + struct hlsl_type *type = load->node.data_type; + +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +@@ -1220,7 +1480,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s + { + unsigned int writemask = store->writemask; + +- if (store->rhs.node->data_type->type == HLSL_CLASS_OBJECT) ++ if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) + writemask = VKD3DSP_WRITEMASK_0; + copy_propagation_set_value(var_def, start, writemask, store->rhs.node); + } +@@ -1270,8 +1530,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + +- copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); +- copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); ++ copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); ++ copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); + + break; + } +@@ -1301,19 +1561,19 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if + bool progress = false; + + copy_propagation_state_init(ctx, &inner_state, state); +- progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state); ++ progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); + copy_propagation_state_destroy(&inner_state); + + copy_propagation_state_init(ctx, &inner_state, state); +- progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state); ++ progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); + copy_propagation_state_destroy(&inner_state); + + /* Ideally we'd invalidate the outer state looking at what was + * touched in the two inner states, but this doesn't work for + * loops (because we need to know what is invalidated in advance), + * so we need copy_propagation_invalidate_from_block() anyway. */ +- copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); +- copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); ++ copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); ++ copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); + + return progress; + } +@@ -1471,7 +1731,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ + + static bool is_vec1(const struct hlsl_type *type) + { +- return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); ++ return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); + } + + static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -1505,21 +1765,20 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + const struct hlsl_ir_load *load, const unsigned int idx, struct hlsl_type *type) + { +- struct hlsl_ir_store *split_store; ++ struct hlsl_ir_node *split_store, *c; + struct hlsl_ir_load *split_load; +- struct hlsl_ir_constant *c; + + if (!(c = hlsl_new_uint_constant(ctx, idx, &store->node.loc))) + return false; +- list_add_before(&store->node.entry, &c->node.entry); ++ list_add_before(&store->node.entry, &c->entry); + +- if (!(split_load = hlsl_new_load_index(ctx, &load->src, &c->node, &store->node.loc))) ++ if (!(split_load = hlsl_new_load_index(ctx, &load->src, c, &store->node.loc))) + return false; + list_add_before(&store->node.entry, &split_load->node.entry); + +- if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, &c->node, &split_load->node, 0, &store->node.loc))) ++ if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, c, &split_load->node, 0, &store->node.loc))) + return false; +- list_add_before(&store->node.entry, &split_store->node.entry); ++ list_add_before(&store->node.entry, &split_store->entry); + + return true; + } +@@ -1538,7 +1797,7 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + store = hlsl_ir_store(instr); + rhs = store->rhs.node; + type = rhs->data_type; +- if (type->type != HLSL_CLASS_ARRAY) ++ if (type->class != HLSL_CLASS_ARRAY) + return false; + element_type = type->e.array.type; + +@@ -1575,7 +1834,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + store = hlsl_ir_store(instr); + rhs = store->rhs.node; + type = rhs->data_type; +- if (type->type != HLSL_CLASS_STRUCT) ++ if (type->class != HLSL_CLASS_STRUCT) + return false; + + if (rhs->type != HLSL_IR_LOAD) +@@ -1614,7 +1873,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + store = hlsl_ir_store(instr); + rhs = store->rhs.node; + type = rhs->data_type; +- if (type->type != HLSL_CLASS_MATRIX) ++ if (type->class != HLSL_CLASS_MATRIX) + return false; + element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + +@@ -1649,22 +1908,21 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + src_type = cast->operands[0].node->data_type; + dst_type = cast->node.data_type; + +- if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) ++ if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) + { +- struct hlsl_ir_swizzle *swizzle; +- struct hlsl_ir_expr *new_cast; ++ struct hlsl_ir_node *new_cast, *swizzle; + + dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); + /* We need to preserve the cast since it might be doing more than just + * narrowing the vector. */ + if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) + return false; +- list_add_after(&cast->node.entry, &new_cast->node.entry); +- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, &new_cast->node, &cast->node.loc))) ++ list_add_after(&cast->node.entry, &new_cast->entry); ++ if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) + return false; +- list_add_after(&new_cast->node.entry, &swizzle->node.entry); ++ list_add_after(&new_cast->entry, &swizzle->entry); + +- hlsl_replace_node(&cast->node, &swizzle->node); ++ hlsl_replace_node(&cast->node, swizzle); + return true; + } + +@@ -1684,8 +1942,7 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + + if (next_instr->type == HLSL_IR_SWIZZLE) + { +- struct hlsl_ir_swizzle *new_swizzle; +- struct hlsl_ir_node *new_instr; ++ struct hlsl_ir_node *new_swizzle; + unsigned int combined_swizzle; + + combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, +@@ -1695,9 +1952,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) + return false; + +- new_instr = &new_swizzle->node; +- list_add_before(&instr->entry, &new_instr->entry); +- hlsl_replace_node(instr, new_instr); ++ list_add_before(&instr->entry, &new_swizzle->entry); ++ hlsl_replace_node(instr, new_swizzle); + return true; + } + +@@ -1737,7 +1993,7 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi + if (expr->op != HLSL_OP2_DIV) + return false; + +- if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, instr->loc))) ++ if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, &instr->loc))) + return false; + list_add_before(&expr->node.entry, &rcp->entry); + expr->op = HLSL_OP2_MUL; +@@ -1758,7 +2014,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c + if (expr->op != HLSL_OP1_SQRT) + return false; + +- if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, instr->loc))) ++ if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, &instr->loc))) + return false; + list_add_before(&expr->node.entry, &rsq->entry); + expr->op = HLSL_OP1_RCP; +@@ -1770,9 +2026,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c + /* Lower DP2 to MUL + ADD */ + static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +- struct hlsl_ir_node *arg1, *arg2, *mul, *replacement; +- struct hlsl_ir_swizzle *add_x, *add_y; +- struct hlsl_ir_constant *zero; ++ struct hlsl_ir_node *arg1, *arg2, *mul, *replacement, *zero, *add_x, *add_y; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) +@@ -1791,11 +2045,11 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co + + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, &expr->node.loc))) + return false; +- list_add_before(&instr->entry, &zero->node.entry); ++ list_add_before(&instr->entry, &zero->entry); + + operands[0] = arg1; + operands[1] = arg2; +- operands[2] = &zero->node; ++ operands[2] = zero; + + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc))) + return false; +@@ -1808,13 +2062,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co + + if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) + return false; +- list_add_before(&instr->entry, &add_x->node.entry); ++ list_add_before(&instr->entry, &add_x->entry); + + if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) + return false; +- list_add_before(&instr->entry, &add_y->node.entry); ++ list_add_before(&instr->entry, &add_y->entry); + +- if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, &add_x->node, &add_y->node))) ++ if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, add_x, add_y))) + return false; + } + list_add_before(&instr->entry, &replacement->entry); +@@ -1836,7 +2090,7 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co + if (expr->op != HLSL_OP1_ABS) + return false; + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); + +@@ -1848,6 +2102,51 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co + return true; + } + ++/* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */ ++static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *arg, *neg, *sum, *frc, *replacement; ++ struct hlsl_type *type = instr->data_type; ++ unsigned int i, component_count; ++ struct hlsl_ir_constant *half; ++ struct hlsl_ir_expr *expr; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ ++ expr = hlsl_ir_expr(instr); ++ arg = expr->operands[0].node; ++ if (expr->op != HLSL_OP1_ROUND) ++ return false; ++ ++ if (!(half = hlsl_new_constant(ctx, type, &expr->node.loc))) ++ return false; ++ ++ component_count = hlsl_type_component_count(type); ++ for (i = 0; i < component_count; ++i) ++ half->value.u[i].f = 0.5f; ++ list_add_before(&instr->entry, &half->node.entry); ++ ++ if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, &half->node))) ++ return false; ++ list_add_before(&instr->entry, &sum->entry); ++ ++ if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, sum, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &frc->entry); ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, frc, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &neg->entry); ++ ++ if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, sum, neg))) ++ return false; ++ list_add_before(&instr->entry, &replacement->entry); ++ ++ hlsl_replace_node(instr, replacement); ++ return true; ++} ++ + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_type *type = instr->data_type, *arg_type; +@@ -1860,7 +2159,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + if (expr->op != HLSL_OP1_CAST) + return false; + arg_type = expr->operands[0].node->data_type; +- if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) ++ if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_BOOL) + return false; +@@ -1879,46 +2178,47 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return true; + } + +-struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, ++struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) + { +- struct hlsl_ir_store *store; ++ struct hlsl_block then_block, else_block; ++ struct hlsl_ir_node *iff, *store; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; +- struct hlsl_ir_if *iff; + + assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + + if (!(var = hlsl_new_synthetic_var(ctx, "conditional", if_true->data_type, &condition->loc))) + return NULL; + +- if (!(iff = hlsl_new_if(ctx, condition, condition->loc))) +- return NULL; +- list_add_tail(instrs, &iff->node.entry); ++ hlsl_block_init(&then_block); ++ hlsl_block_init(&else_block); + + if (!(store = hlsl_new_simple_store(ctx, var, if_true))) + return NULL; +- list_add_tail(&iff->then_instrs.instrs, &store->node.entry); ++ hlsl_block_add_instr(&then_block, store); + + if (!(store = hlsl_new_simple_store(ctx, var, if_false))) + return NULL; +- list_add_tail(&iff->else_instrs.instrs, &store->node.entry); ++ hlsl_block_add_instr(&else_block, store); + +- if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) ++ if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) ++ return NULL; ++ list_add_tail(instrs, &iff->entry); ++ ++ if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) + return NULL; + list_add_tail(instrs, &load->node.entry); + +- return load; ++ return &load->node; + } + + static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +- struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg; ++ struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond; + struct hlsl_type *type = instr->data_type, *utype; +- struct hlsl_ir_expr *cast1, *cast2, *cast3; + struct hlsl_ir_constant *high_bit; + struct hlsl_ir_expr *expr; +- struct hlsl_ir_load *cond; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) +@@ -1928,11 +2228,11 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + arg2 = expr->operands[1].node; + if (expr->op != HLSL_OP2_DIV) + return false; +- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) ++ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_INT) + return false; +- utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); ++ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + + if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) + return false; +@@ -1941,56 +2241,54 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) + return false; + for (i = 0; i < type->dimx; ++i) +- high_bit->value[i].u = 0x80000000; ++ high_bit->value.u[i].u = 0x80000000; + list_add_before(&instr->entry, &high_bit->node.entry); + + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, &high_bit->node))) + return false; + list_add_before(&instr->entry, &and->entry); + +- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) ++ if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) + return false; + list_add_before(&instr->entry, &abs1->entry); + + if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast1->node.entry); ++ list_add_before(&instr->entry, &cast1->entry); + +- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) ++ if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) + return false; + list_add_before(&instr->entry, &abs2->entry); + + if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast2->node.entry); ++ list_add_before(&instr->entry, &cast2->entry); + +- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &cast1->node, &cast2->node))) ++ if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) + return false; + list_add_before(&instr->entry, &div->entry); + + if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast3->node.entry); ++ list_add_before(&instr->entry, &cast3->entry); + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); + +- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) ++ if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) + return false; +- hlsl_replace_node(instr, &cond->node); ++ hlsl_replace_node(instr, cond); + + return true; + } + + static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +- struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg; ++ struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond; + struct hlsl_type *type = instr->data_type, *utype; +- struct hlsl_ir_expr *cast1, *cast2, *cast3; + struct hlsl_ir_constant *high_bit; + struct hlsl_ir_expr *expr; +- struct hlsl_ir_load *cond; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) +@@ -2000,53 +2298,53 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + arg2 = expr->operands[1].node; + if (expr->op != HLSL_OP2_MOD) + return false; +- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) ++ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_INT) + return false; +- utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); ++ utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + + if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) + return false; + for (i = 0; i < type->dimx; ++i) +- high_bit->value[i].u = 0x80000000; ++ high_bit->value.u[i].u = 0x80000000; + list_add_before(&instr->entry, &high_bit->node.entry); + + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, &high_bit->node))) + return false; + list_add_before(&instr->entry, &and->entry); + +- if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) ++ if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) + return false; + list_add_before(&instr->entry, &abs1->entry); + + if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast1->node.entry); ++ list_add_before(&instr->entry, &cast1->entry); + +- if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) ++ if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) + return false; + list_add_before(&instr->entry, &abs2->entry); + + if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast2->node.entry); ++ list_add_before(&instr->entry, &cast2->entry); + +- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, &cast1->node, &cast2->node))) ++ if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) + return false; + list_add_before(&instr->entry, &div->entry); + + if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) + return false; +- list_add_before(&instr->entry, &cast3->node.entry); ++ list_add_before(&instr->entry, &cast3->entry); + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); + +- if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) ++ if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) + return false; +- hlsl_replace_node(instr, &cond->node); ++ hlsl_replace_node(instr, cond); + + return true; + } +@@ -2063,14 +2361,14 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void + + if (expr->op != HLSL_OP1_ABS) + return false; +- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) ++ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_INT) + return false; + + arg = expr->operands[0].node; + +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); + +@@ -2082,10 +2380,9 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void + + static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +- struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc; ++ struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond; + struct hlsl_type *type = instr->data_type, *btype; + struct hlsl_ir_constant *one; +- struct hlsl_ir_load *cond; + struct hlsl_ir_expr *expr; + unsigned int i; + +@@ -2096,17 +2393,17 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + arg2 = expr->operands[1].node; + if (expr->op != HLSL_OP2_MOD) + return false; +- if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) ++ if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_FLOAT) + return false; +- btype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_BOOL, type->dimx, type->dimy); ++ btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); + + if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) + return false; + list_add_before(&instr->entry, &mul1->entry); + +- if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, instr->loc))) ++ if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg1->entry); + +@@ -2115,7 +2412,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + ge->data_type = btype; + list_add_before(&instr->entry, &ge->entry); + +- if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, instr->loc))) ++ if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg2->entry); + +@@ -2125,10 +2422,10 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + if (!(one = hlsl_new_constant(ctx, type, &instr->loc))) + return false; + for (i = 0; i < type->dimx; ++i) +- one->value[i].f = 1.0f; ++ one->value.u[i].f = 1.0f; + list_add_before(&instr->entry, &one->node.entry); + +- if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &one->node, &cond->node))) ++ if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &one->node, cond))) + return false; + list_add_before(&instr->entry, &div->entry); + +@@ -2136,7 +2433,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return false; + list_add_before(&instr->entry, &mul2->entry); + +- if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, instr->loc))) ++ if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) + return false; + list_add_before(&instr->entry, &frc->entry); + +@@ -2144,7 +2441,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + hlsl_src_remove(&expr->operands[0]); + hlsl_src_remove(&expr->operands[1]); + hlsl_src_from_node(&expr->operands[0], frc); +- hlsl_src_from_node(&expr->operands[1], &cond->node); ++ hlsl_src_from_node(&expr->operands[1], cond); + + return true; + } +@@ -2155,6 +2452,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: ++ case HLSL_IR_INDEX: + case HLSL_IR_LOAD: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_SWIZZLE: +@@ -2204,8 +2502,8 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in + if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); +- index = index_instructions(&iff->then_instrs, index); +- index = index_instructions(&iff->else_instrs, index); ++ index = index_instructions(&iff->then_block, index); ++ index = index_instructions(&iff->else_block, index); + } + else if (instr->type == HLSL_IR_LOOP) + { +@@ -2262,9 +2560,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + continue; + regset = hlsl_type_get_regset(var->data_type); + +- if (var->reg_reservation.type) ++ if (var->reg_reservation.reg_type) + { +- if (var->reg_reservation.type != get_regset_name(regset)) ++ if (var->reg_reservation.reg_type != get_regset_name(regset)) + { + struct vkd3d_string_buffer *type_string; + +@@ -2277,8 +2575,11 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + else + { + var->regs[regset].allocated = true; +- var->regs[regset].id = var->reg_reservation.index; +- TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.type, var->reg_reservation.index); ++ var->regs[regset].id = var->reg_reservation.reg_index; ++ var->regs[regset].bind_count = var->data_type->reg_size[regset]; ++ TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, ++ var->reg_reservation.reg_index, var->reg_reservation.reg_type, ++ var->reg_reservation.reg_index + var->regs[regset].bind_count); + } + } + } +@@ -2286,9 +2587,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + + /* Compute the earliest and latest liveness for each variable. In the case that + * a variable is accessed inside of a loop, we promote its liveness to extend +- * to at least the range of the entire loop. Note that we don't need to do this +- * for anonymous nodes, since there's currently no way to use a node which was +- * calculated in an earlier iteration of the loop. */ ++ * to at least the range of the entire loop. We also do this for nodes, so that ++ * nodes produced before the loop have their temp register protected from being ++ * overridden after the last read within an iteration. */ + static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) + { + struct hlsl_ir_node *instr; +@@ -2296,7 +2597,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { +- const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; ++ const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index; + + switch (instr->type) + { +@@ -2311,9 +2612,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + var = store->lhs.var; + if (!var->first_write) + var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; +- store->rhs.node->last_read = instr->index; ++ store->rhs.node->last_read = last_read; + if (store->lhs.offset.node) +- store->lhs.offset.node->last_read = instr->index; ++ store->lhs.offset.node->last_read = last_read; + break; + } + case HLSL_IR_EXPR: +@@ -2322,16 +2623,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) +- expr->operands[i].node->last_read = instr->index; ++ expr->operands[i].node->last_read = last_read; + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + +- compute_liveness_recurse(&iff->then_instrs, loop_first, loop_last); +- compute_liveness_recurse(&iff->else_instrs, loop_first, loop_last); +- iff->condition.node->last_read = instr->index; ++ compute_liveness_recurse(&iff->then_block, loop_first, loop_last); ++ compute_liveness_recurse(&iff->else_block, loop_first, loop_last); ++ iff->condition.node->last_read = last_read; + break; + } + case HLSL_IR_LOAD: +@@ -2339,9 +2640,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + struct hlsl_ir_load *load = hlsl_ir_load(instr); + + var = load->src.var; +- var->last_read = max(var->last_read, var_last_read); ++ var->last_read = max(var->last_read, last_read); + if (load->src.offset.node) +- load->src.offset.node->last_read = instr->index; ++ load->src.offset.node->last_read = last_read; + break; + } + case HLSL_IR_LOOP: +@@ -2357,22 +2658,28 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + + var = load->resource.var; +- var->last_read = max(var->last_read, var_last_read); ++ var->last_read = max(var->last_read, last_read); + if (load->resource.offset.node) +- load->resource.offset.node->last_read = instr->index; ++ load->resource.offset.node->last_read = last_read; + + if ((var = load->sampler.var)) + { +- var->last_read = max(var->last_read, var_last_read); ++ var->last_read = max(var->last_read, last_read); + if (load->sampler.offset.node) +- load->sampler.offset.node->last_read = instr->index; ++ load->sampler.offset.node->last_read = last_read; + } + +- load->coords.node->last_read = instr->index; ++ load->coords.node->last_read = last_read; + if (load->texel_offset.node) +- load->texel_offset.node->last_read = instr->index; ++ load->texel_offset.node->last_read = last_read; + if (load->lod.node) +- load->lod.node->last_read = instr->index; ++ load->lod.node->last_read = last_read; ++ if (load->ddx.node) ++ load->ddx.node->last_read = last_read; ++ if (load->ddy.node) ++ load->ddy.node->last_read = last_read; ++ if (load->sample_index.node) ++ load->sample_index.node->last_read = last_read; + break; + } + case HLSL_IR_RESOURCE_STORE: +@@ -2380,18 +2687,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + + var = store->resource.var; +- var->last_read = max(var->last_read, var_last_read); ++ var->last_read = max(var->last_read, last_read); + if (store->resource.offset.node) +- store->resource.offset.node->last_read = instr->index; +- store->coords.node->last_read = instr->index; +- store->value.node->last_read = instr->index; ++ store->resource.offset.node->last_read = last_read; ++ store->coords.node->last_read = last_read; ++ store->value.node->last_read = last_read; + break; + } + case HLSL_IR_SWIZZLE: + { + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + +- swizzle->val.node->last_read = instr->index; ++ swizzle->val.node->last_read = last_read; ++ break; ++ } ++ case HLSL_IR_INDEX: ++ { ++ struct hlsl_ir_index *index = hlsl_ir_index(instr); ++ ++ index->val.node->last_read = last_read; ++ index->idx.node->last_read = last_read; + break; + } + case HLSL_IR_CONSTANT: +@@ -2426,127 +2741,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + compute_liveness_recurse(&entry_func->body, 0, 0); + } + +-struct liveness ++struct register_allocator + { +- size_t size; +- uint32_t reg_count; +- struct ++ size_t count, capacity; ++ ++ /* Highest register index that has been allocated. ++ * Used to declare sm4 temp count. */ ++ uint32_t max_reg; ++ ++ struct allocation + { +- /* 0 if not live yet. */ +- unsigned int last_read; +- } *regs; ++ uint32_t reg; ++ unsigned int writemask; ++ unsigned int first_write, last_read; ++ } *allocations; + }; + +-static unsigned int get_available_writemask(struct liveness *liveness, +- unsigned int first_write, unsigned int component_idx, unsigned int reg_size) ++static unsigned int get_available_writemask(const struct register_allocator *allocator, ++ unsigned int first_write, unsigned int last_read, uint32_t reg_idx) + { +- unsigned int i, writemask = 0, count = 0; ++ unsigned int writemask = VKD3DSP_WRITEMASK_ALL; ++ size_t i; + +- for (i = 0; i < 4; ++i) ++ for (i = 0; i < allocator->count; ++i) + { +- if (liveness->regs[component_idx + i].last_read <= first_write) +- { +- writemask |= 1u << i; +- if (++count == reg_size) +- return writemask; +- } ++ const struct allocation *allocation = &allocator->allocations[i]; ++ ++ /* We do not overlap if first write == last read: ++ * this is the case where we are allocating the result of that ++ * expression, e.g. "add r0, r0, r1". */ ++ ++ if (allocation->reg == reg_idx ++ && first_write < allocation->last_read && last_read > allocation->first_write) ++ writemask &= ~allocation->writemask; ++ ++ if (!writemask) ++ break; + } + +- return 0; ++ return writemask; + } + +-static bool resize_liveness(struct hlsl_ctx *ctx, struct liveness *liveness, size_t new_count) ++static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, ++ uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) + { +- size_t old_capacity = liveness->size; ++ struct allocation *allocation; + +- if (!hlsl_array_reserve(ctx, (void **)&liveness->regs, &liveness->size, new_count, sizeof(*liveness->regs))) +- return false; ++ if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity, ++ allocator->count + 1, sizeof(*allocator->allocations))) ++ return; + +- if (liveness->size > old_capacity) +- memset(liveness->regs + old_capacity, 0, (liveness->size - old_capacity) * sizeof(*liveness->regs)); +- return true; ++ allocation = &allocator->allocations[allocator->count++]; ++ allocation->reg = reg_idx; ++ allocation->writemask = writemask; ++ allocation->first_write = first_write; ++ allocation->last_read = last_read; ++ ++ allocator->max_reg = max(allocator->max_reg, reg_idx); + } + + /* reg_size is the number of register components to be reserved, while component_count is the number + * of components for the register's writemask. In SM1, floats and vectors allocate the whole + * register, even if they don't use it completely. */ +-static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *liveness, ++static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, unsigned int reg_size, + unsigned int component_count) + { +- unsigned int component_idx, writemask, i; + struct hlsl_reg ret = {0}; ++ unsigned int writemask; ++ uint32_t reg_idx; + + assert(component_count <= reg_size); + +- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) ++ for (reg_idx = 0;; ++reg_idx) + { +- if ((writemask = get_available_writemask(liveness, first_write, component_idx, reg_size))) ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); ++ ++ if (vkd3d_popcount(writemask) >= reg_size) ++ { ++ writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); + break; ++ } + } +- if (component_idx == liveness->size) +- { +- if (!resize_liveness(ctx, liveness, component_idx + 4)) +- return ret; +- writemask = (1u << reg_size) - 1; +- } +- for (i = 0; i < 4; ++i) +- { +- if (writemask & (1u << i)) +- liveness->regs[component_idx + i].last_read = last_read; +- } +- ret.id = component_idx / 4; ++ ++ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); ++ ++ ret.id = reg_idx; ++ ret.bind_count = 1; + ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); + ret.allocated = true; +- liveness->reg_count = max(liveness->reg_count, ret.id + 1); + return ret; + } + +-static bool is_range_available(struct liveness *liveness, unsigned int first_write, +- unsigned int component_idx, unsigned int reg_size) ++static bool is_range_available(const struct register_allocator *allocator, ++ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) + { +- unsigned int i; ++ uint32_t i; + +- for (i = 0; i < reg_size; i += 4) ++ for (i = 0; i < (reg_size / 4); ++i) + { +- if (!get_available_writemask(liveness, first_write, component_idx + i, 4)) ++ if (get_available_writemask(allocator, first_write, last_read, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) + return false; + } + return true; + } + +-static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liveness, ++static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, unsigned int reg_size) + { +- unsigned int i, component_idx; + struct hlsl_reg ret = {0}; ++ uint32_t reg_idx; ++ unsigned int i; + +- for (component_idx = 0; component_idx < liveness->size; component_idx += 4) ++ for (reg_idx = 0;; ++reg_idx) + { +- if (is_range_available(liveness, first_write, component_idx, +- min(reg_size, liveness->size - component_idx))) ++ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) + break; + } +- if (!resize_liveness(ctx, liveness, component_idx + reg_size)) +- return ret; + +- for (i = 0; i < reg_size; ++i) +- liveness->regs[component_idx + i].last_read = last_read; +- ret.id = component_idx / 4; ++ for (i = 0; i < reg_size / 4; ++i) ++ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); ++ ++ ret.id = reg_idx; ++ ret.bind_count = align(reg_size, 4) / 4; + ret.allocated = true; +- liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); + return ret; + } + +-static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct liveness *liveness, ++static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) + { + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + +- if (type->type <= HLSL_CLASS_VECTOR) +- return allocate_register(ctx, liveness, first_write, last_read, reg_size, type->dimx); ++ if (type->class <= HLSL_CLASS_VECTOR) ++ return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); + else +- return allocate_range(ctx, liveness, first_write, last_read, reg_size); ++ return allocate_range(ctx, allocator, first_write, last_read, reg_size); + } + + static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) +@@ -2565,14 +2895,99 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct + return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); + } + +-static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) ++static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_resource_load *load; ++ struct hlsl_ir_var *var; ++ enum hlsl_regset regset; ++ unsigned int index; ++ ++ if (instr->type != HLSL_IR_RESOURCE_LOAD) ++ return false; ++ ++ load = hlsl_ir_resource_load(instr); ++ var = load->resource.var; ++ regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); ++ ++ if (regset == HLSL_REGSET_SAMPLERS) ++ { ++ enum hlsl_sampler_dim dim; ++ ++ assert(!load->sampler.var); ++ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) ++ return false; ++ ++ dim = var->objects_usage[regset][index].sampler_dim; ++ if (dim != load->sampling_dim) ++ { ++ if (dim == HLSL_SAMPLER_DIM_GENERIC) ++ { ++ var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc; ++ } ++ else ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, ++ "Inconsistent generic sampler usage dimension."); ++ hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc, ++ VKD3D_SHADER_LOG_ERROR, "First use is here."); ++ return false; ++ } ++ } ++ var->objects_usage[regset][index].used = true; ++ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; ++ } ++ else ++ { ++ if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) ++ return false; ++ ++ var->objects_usage[regset][index].used = true; ++ var->objects_usage[regset][index].sampler_dim = load->sampling_dim; ++ ++ if (load->sampler.var) ++ { ++ var = load->sampler.var; ++ if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) ++ return false; ++ ++ var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; ++ } ++ } ++ ++ return false; ++} ++ ++static void calculate_resource_register_counts(struct hlsl_ctx *ctx) ++{ ++ struct hlsl_ir_var *var; ++ struct hlsl_type *type; ++ unsigned int i, k; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ type = var->data_type; ++ ++ for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) ++ { ++ for (i = 0; i < type->reg_size[k]; ++i) ++ { ++ /* Samplers are only allocated until the last used one. */ ++ if (var->objects_usage[k][i].used) ++ var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; ++ } ++ } ++ } ++} ++ ++static void allocate_variable_temp_register(struct hlsl_ctx *ctx, ++ struct hlsl_ir_var *var, struct register_allocator *allocator) + { + if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) + return; + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) + { +- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, liveness, ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, + var->first_write, var->last_read, var->data_type); + + TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', +@@ -2580,7 +2995,8 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir + } + } + +-static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) ++static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, struct register_allocator *allocator) + { + struct hlsl_ir_node *instr; + +@@ -2588,7 +3004,7 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl + { + if (!instr->reg.allocated && instr->last_read) + { +- instr->reg = allocate_numeric_registers_for_type(ctx, liveness, instr->index, instr->last_read, ++ instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, + instr->data_type); + TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, + debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); +@@ -2599,8 +3015,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); +- allocate_temp_registers_recurse(ctx, &iff->then_instrs, liveness); +- allocate_temp_registers_recurse(ctx, &iff->else_instrs, liveness); ++ allocate_temp_registers_recurse(ctx, &iff->then_block, allocator); ++ allocate_temp_registers_recurse(ctx, &iff->else_block, allocator); + break; + } + +@@ -2609,21 +3025,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl + struct hlsl_ir_load *load = hlsl_ir_load(instr); + /* We need to at least allocate a variable for undefs. + * FIXME: We should probably find a way to remove them instead. */ +- allocate_variable_temp_register(ctx, load->src.var, liveness); ++ allocate_variable_temp_register(ctx, load->src.var, allocator); + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); +- allocate_temp_registers_recurse(ctx, &loop->body, liveness); ++ allocate_temp_registers_recurse(ctx, &loop->body, allocator); + break; + } + + case HLSL_IR_STORE: + { + struct hlsl_ir_store *store = hlsl_ir_store(instr); +- allocate_variable_temp_register(ctx, store->lhs.var, liveness); ++ allocate_variable_temp_register(ctx, store->lhs.var, allocator); + break; + } + +@@ -2633,7 +3049,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl + } + } + +-static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) ++static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, struct register_allocator *allocator) + { + struct hlsl_constant_defs *defs = &ctx->constant_defs; + struct hlsl_ir_node *instr; +@@ -2649,7 +3066,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b + unsigned int x, y, i, writemask, end_reg; + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + +- constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); ++ constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + + if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, +@@ -2662,7 +3079,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b + defs->count = end_reg; + } + +- assert(type->type <= HLSL_CLASS_LAST_NUMERIC); ++ assert(type->class <= HLSL_CLASS_LAST_NUMERIC); + + if (!(writemask = constant->reg.writemask)) + writemask = (1u << type->dimx) - 1; +@@ -2671,12 +3088,12 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b + { + for (x = 0, i = 0; x < 4; ++x) + { +- const union hlsl_constant_value *value; ++ const union hlsl_constant_value_component *value; + float f; + + if (!(writemask & (1u << x))) + continue; +- value = &constant->value[i++]; ++ value = &constant->value.u[i++]; + + switch (type->base_type) + { +@@ -2714,15 +3131,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b + case HLSL_IR_IF: + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); +- allocate_const_registers_recurse(ctx, &iff->then_instrs, liveness); +- allocate_const_registers_recurse(ctx, &iff->else_instrs, liveness); ++ allocate_const_registers_recurse(ctx, &iff->then_block, allocator); ++ allocate_const_registers_recurse(ctx, &iff->else_block, allocator); + break; + } + + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); +- allocate_const_registers_recurse(ctx, &loop->body, liveness); ++ allocate_const_registers_recurse(ctx, &loop->body, allocator); + break; + } + +@@ -2734,10 +3151,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b + + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { +- struct liveness liveness = {0}; ++ struct register_allocator allocator = {0}; + struct hlsl_ir_var *var; + +- allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); ++ allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +@@ -2748,12 +3165,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + if (reg_size == 0) + continue; + +- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &liveness, ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, + 1, UINT_MAX, var->data_type); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } + } ++ ++ vkd3d_free(allocator.allocations); + } + + /* Simple greedy temporary register allocation pass that just assigns a unique +@@ -2762,15 +3181,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + * does not handle constants. */ + static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { +- struct liveness liveness = {0}; +- allocate_temp_registers_recurse(ctx, &entry_func->body, &liveness); +- ctx->temp_count = liveness.reg_count; +- vkd3d_free(liveness.regs); ++ struct register_allocator allocator = {0}; ++ ++ /* ps_1_* outputs are special and go in temp register 0. */ ++ if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ { ++ size_t i; ++ ++ for (i = 0; i < entry_func->parameters.count; ++i) ++ { ++ const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; ++ ++ if (var->is_output_semantic) ++ { ++ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); ++ break; ++ } ++ } ++ } ++ ++ allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); ++ ctx->temp_count = allocator.max_reg + 1; ++ vkd3d_free(allocator.allocations); + } + + static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) + { +- static const char *shader_names[] = ++ static const char *const shader_names[] = + { + [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", + [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", +@@ -2791,7 +3228,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + D3DDECLUSAGE usage; + uint32_t usage_idx; + +- if (!hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) ++ /* ps_1_* outputs are special and go in temp register 0. */ ++ if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ return; ++ ++ builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); ++ if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -2800,8 +3242,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + if ((!output && !var->last_read) || (output && !var->first_write)) + return; +- +- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); + } + else + { +@@ -2827,6 +3267,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + { + var->regs[HLSL_REGSET_NUMERIC].allocated = true; + var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; ++ var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; + var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; + TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', + var->regs[HLSL_REGSET_NUMERIC], var->data_type)); +@@ -2853,23 +3294,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 + + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) + { +- if (buffer->used_size && buffer->reservation.type == 'b' && buffer->reservation.index == index) ++ if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) + return buffer; + } + return NULL; + } + +-static void calculate_buffer_offset(struct hlsl_ir_var *var) ++static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) + { ++ unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ enum hlsl_type_class var_class = var->data_type->class; + struct hlsl_buffer *buffer = var->buffer; + +- buffer->size = hlsl_type_get_sm4_offset(var->data_type, buffer->size); ++ if (var->reg_reservation.offset_type == 'c') ++ { ++ if (var->reg_reservation.offset_index % 4) ++ { ++ if (var_class == HLSL_CLASS_MATRIX) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() reservations with matrix types must be aligned with the beginning of a register."); ++ } ++ else if (var_class == HLSL_CLASS_ARRAY) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() reservations with array types must be aligned with the beginning of a register."); ++ } ++ else if (var_class == HLSL_CLASS_STRUCT) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() reservations with struct types must be aligned with the beginning of a register."); ++ } ++ else if (var_class == HLSL_CLASS_VECTOR) ++ { ++ unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index); ++ ++ if (var->reg_reservation.offset_index != aligned_offset) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() reservations with vector types cannot span multiple registers."); ++ } ++ } ++ var->buffer_offset = var->reg_reservation.offset_index; ++ } ++ else ++ { ++ var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size); ++ } + +- var->buffer_offset = buffer->size; + TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); +- buffer->size += var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); + if (var->last_read) +- buffer->used_size = buffer->size; ++ buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); ++} ++ ++static void validate_buffer_offsets(struct hlsl_ctx *ctx) ++{ ++ struct hlsl_ir_var *var1, *var2; ++ struct hlsl_buffer *buffer; ++ ++ LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) ++ continue; ++ ++ buffer = var1->buffer; ++ if (!buffer->used_size) ++ continue; ++ ++ LIST_FOR_EACH_ENTRY(var2, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ unsigned int var1_reg_size, var2_reg_size; ++ ++ if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) ++ continue; ++ ++ if (var1 == var2 || var1->buffer != var2->buffer) ++ continue; ++ ++ /* This is to avoid reporting the error twice for the same pair of overlapping variables. */ ++ if (strcmp(var1->name, var2->name) >= 0) ++ continue; ++ ++ var1_reg_size = var1->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ var2_reg_size = var2->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ ++ if (var1->buffer_offset < var2->buffer_offset + var2_reg_size ++ && var2->buffer_offset < var1->buffer_offset + var1_reg_size) ++ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid packoffset() reservation: Variables %s and %s overlap.", ++ var1->name, var2->name); ++ } ++ } ++ ++ LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ buffer = var1->buffer; ++ if (!buffer || buffer == ctx->globals_buffer) ++ continue; ++ ++ if (var1->reg_reservation.offset_type ++ || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) ++ buffer->manually_packed_elements = true; ++ else ++ buffer->automatically_packed_elements = true; ++ ++ if (buffer->manually_packed_elements && buffer->automatically_packed_elements) ++ { ++ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "packoffset() must be specified for all the buffer elements, or none of them."); ++ break; ++ } ++ } + } + + static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -2880,15 +3415,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_uniform && var->data_type->type != HLSL_CLASS_OBJECT) ++ if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) + { + if (var->is_param) + var->buffer = ctx->params_buffer; + +- calculate_buffer_offset(var); ++ calculate_buffer_offset(ctx, var); + } + } + ++ validate_buffer_offsets(ctx); ++ + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->used_size) +@@ -2896,28 +3433,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + + if (buffer->type == HLSL_BUFFER_CONSTANT) + { +- if (buffer->reservation.type == 'b') ++ if (buffer->reservation.reg_type == 'b') + { +- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.index); ++ const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); + + if (reserved_buffer && reserved_buffer != buffer) + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, +- "Multiple buffers bound to cb%u.", buffer->reservation.index); ++ "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); + hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, +- "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.index); ++ "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); + } + +- buffer->reg.id = buffer->reservation.index; ++ buffer->reg.id = buffer->reservation.reg_index; ++ buffer->reg.bind_count = 1; + buffer->reg.allocated = true; + TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); + } +- else if (!buffer->reservation.type) ++ else if (!buffer->reservation.reg_type) + { + while (get_reserved_buffer(ctx, index)) + ++index; + + buffer->reg.id = index; ++ buffer->reg.bind_count = 1; + buffer->reg.allocated = true; + TRACE("Allocated %s to cb%u.\n", buffer->name, index); + ++index; +@@ -2939,13 +3478,17 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + uint32_t index) + { + const struct hlsl_ir_var *var; ++ unsigned int start, count; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) + { + if (!var->regs[regset].allocated) + continue; + +- if (index == var->regs[regset].id) ++ start = var->regs[regset].id; ++ count = var->regs[regset].bind_count; ++ ++ if (start <= index && index < start + count) + return var; + } + return NULL; +@@ -2956,7 +3499,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + char regset_name = get_regset_name(regset); + struct hlsl_ir_var *var; + uint32_t min_index = 0; +- uint32_t index; + + if (regset == HLSL_REGSET_UAVS) + { +@@ -2968,19 +3510,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + } + } + +- index = min_index; +- + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (!var->last_read || !var->data_type->reg_size[regset]) ++ unsigned int count = var->regs[regset].bind_count; ++ ++ if (count == 0) + continue; + + if (var->regs[regset].allocated) + { +- const struct hlsl_ir_var *reserved_object; +- unsigned int index = var->regs[regset].id; +- +- reserved_object = get_allocated_object(ctx, regset, index); ++ const struct hlsl_ir_var *reserved_object, *last_reported = NULL; ++ unsigned int index, i; + + if (var->regs[regset].id < min_index) + { +@@ -2988,28 +3528,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "UAV index (%u) must be higher than the maximum render target index (%u).", + var->regs[regset].id, min_index - 1); ++ continue; + } +- else if (reserved_object && reserved_object != var) ++ ++ for (i = 0; i < count; ++i) + { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, +- "Multiple objects bound to %c%u.", regset_name, index); +- hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, +- "Object '%s' is already bound to %c%u.", reserved_object->name, +- regset_name, index); +- } ++ index = var->regs[regset].id + i; + +- var->regs[regset].id = var->reg_reservation.index; +- var->regs[regset].allocated = true; +- TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); ++ reserved_object = get_allocated_object(ctx, regset, index); ++ if (reserved_object && reserved_object != var && reserved_object != last_reported) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, ++ "Multiple variables bound to %c%u.", regset_name, index); ++ hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, ++ "Variable '%s' is already bound to %c%u.", reserved_object->name, ++ regset_name, index); ++ last_reported = reserved_object; ++ } ++ } + } + else + { +- while (get_allocated_object(ctx, regset, index)) ++ unsigned int index = min_index; ++ unsigned int available = 0; ++ ++ while (available < count) ++ { ++ if (get_allocated_object(ctx, regset, index)) ++ available = 0; ++ else ++ ++available; + ++index; ++ } ++ index -= count; + + var->regs[regset].id = index; + var->regs[regset].allocated = true; +- TRACE("Allocated object to %c%u.\n", regset_name, index); ++ TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, ++ index + count); + ++index; + } + } +@@ -3034,12 +3590,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + return false; + + /* We should always have generated a cast to UINT. */ +- assert(path_node->data_type->type == HLSL_CLASS_SCALAR ++ assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + +- idx = hlsl_ir_constant(path_node)->value[0].u; ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; + +- switch (type->type) ++ switch (type->class) + { + case HLSL_CLASS_VECTOR: + if (idx >= type->dimx) +@@ -3090,6 +3646,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + return true; + } + ++bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, ++ enum hlsl_regset regset, unsigned int *index) ++{ ++ struct hlsl_type *type = deref->var->data_type; ++ unsigned int i; ++ ++ assert(regset <= HLSL_REGSET_LAST_OBJECT); ++ ++ *index = 0; ++ ++ for (i = 0; i < deref->path_len; ++i) ++ { ++ struct hlsl_ir_node *path_node = deref->path[i].node; ++ unsigned int idx = 0; ++ ++ assert(path_node); ++ if (path_node->type != HLSL_IR_CONSTANT) ++ return false; ++ ++ /* We should always have generated a cast to UINT. */ ++ assert(path_node->data_type->class == HLSL_CLASS_SCALAR ++ && path_node->data_type->base_type == HLSL_TYPE_UINT); ++ ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ if (idx >= type->e.array.elements_count) ++ return false; ++ ++ *index += idx * type->e.array.type->reg_size[regset]; ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ *index += type->e.record.fields[idx].reg_offset[regset]; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); ++ } ++ ++ assert(type->reg_size[regset] == 1); ++ return true; ++} ++ + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) + { + struct hlsl_ir_node *offset_node = deref->offset.node; +@@ -3102,13 +3707,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref + } + + /* We should always have generated a cast to UINT. */ +- assert(offset_node->data_type->type == HLSL_CLASS_SCALAR ++ assert(offset_node->data_type->class == HLSL_CLASS_SCALAR + && offset_node->data_type->base_type == HLSL_TYPE_UINT); + + if (offset_node->type != HLSL_IR_CONSTANT) + return false; + +- *offset = hlsl_ir_constant(offset_node)->value[0].u; ++ *offset = hlsl_ir_constant(offset_node)->value.u[0].u; + + size = deref->var->data_type->reg_size[deref->offset_regset]; + if (*offset >= size) +@@ -3170,7 +3775,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a + const struct hlsl_type *type = instr->data_type; + const struct hlsl_ir_constant *constant; + +- if (type->type != HLSL_CLASS_SCALAR ++ if (type->class != HLSL_CLASS_SCALAR + || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) + { + struct vkd3d_string_buffer *string; +@@ -3190,13 +3795,32 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a + } + constant = hlsl_ir_constant(instr); + +- if ((type->base_type == HLSL_TYPE_INT && constant->value[0].i <= 0) +- || (type->base_type == HLSL_TYPE_UINT && !constant->value[0].u)) ++ if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) ++ || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, + "Thread count must be a positive integer."); + +- ctx->thread_count[i] = constant->value[0].u; ++ ctx->thread_count[i] = constant->value.u[0].u; ++ } ++} ++ ++static bool type_has_object_components(struct hlsl_type *type) ++{ ++ if (type->class == HLSL_CLASS_OBJECT) ++ return true; ++ if (type->class == HLSL_CLASS_ARRAY) ++ return type_has_object_components(type->e.array.type); ++ if (type->class == HLSL_CLASS_STRUCT) ++ { ++ unsigned int i; ++ ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ if (type_has_object_components(type->e.record.fields[i].type)) ++ return true; ++ } + } ++ return false; + } + + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +@@ -3212,7 +3836,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + list_move_head(&body->instrs, &ctx->static_initializers); + + memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); +- transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); ++ hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); + vkd3d_free(recursive_call_ctx.backtrace); + + /* Avoid going into an infinite loop when processing call instructions. +@@ -3222,7 +3846,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + + lower_return(ctx, entry_func, body, false); + +- while (transform_ir(ctx, lower_calls, body, NULL)); ++ while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); ++ ++ hlsl_transform_ir(ctx, lower_index_loads, body, NULL); + + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { +@@ -3234,15 +3860,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + { + var = entry_func->parameters.vars[i]; + +- if (var->data_type->type == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { + prepend_uniform_copy(ctx, &body->instrs, var); + } + else + { +- if (var->data_type->type != HLSL_CLASS_STRUCT && !var->semantic.name) ++ if (type_has_object_components(var->data_type)) ++ hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); ++ ++ if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT ++ && !var->semantic.name) ++ { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Parameter \"%s\" is missing a semantic.", var->name); ++ var->semantic.reported_missing = true; ++ } + + if (var->storage_modifiers & HLSL_STORAGE_IN) + prepend_input_var_copy(ctx, &body->instrs, var); +@@ -3252,7 +3885,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } + if (entry_func->return_var) + { +- if (entry_func->return_var->data_type->type != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) ++ if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); + +@@ -3274,53 +3907,55 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); + +- transform_ir(ctx, lower_broadcasts, body, NULL); +- while (transform_ir(ctx, fold_redundant_casts, body, NULL)); ++ hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); ++ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); + do + { +- progress = transform_ir(ctx, split_array_copies, body, NULL); +- progress |= transform_ir(ctx, split_struct_copies, body, NULL); ++ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); ++ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); + } + while (progress); +- transform_ir(ctx, split_matrix_copies, body, NULL); +- +- transform_ir(ctx, lower_narrowing_casts, body, NULL); +- transform_ir(ctx, lower_casts_to_bool, body, NULL); +- transform_ir(ctx, lower_int_division, body, NULL); +- transform_ir(ctx, lower_int_modulus, body, NULL); +- transform_ir(ctx, lower_int_abs, body, NULL); +- transform_ir(ctx, lower_float_modulus, body, NULL); ++ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); ++ ++ hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); ++ hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); ++ hlsl_transform_ir(ctx, lower_int_division, body, NULL); ++ hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); ++ hlsl_transform_ir(ctx, lower_int_abs, body, NULL); ++ hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); + do + { +- progress = transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); +- progress |= transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= copy_propagation_execute(ctx, body); +- progress |= transform_ir(ctx, fold_swizzle_chains, body, NULL); +- progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); ++ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + } + while (progress); + + if (profile->major_version < 4) + { +- transform_ir(ctx, lower_division, body, NULL); +- transform_ir(ctx, lower_sqrt, body, NULL); +- transform_ir(ctx, lower_dot, body, NULL); ++ hlsl_transform_ir(ctx, lower_division, body, NULL); ++ hlsl_transform_ir(ctx, lower_sqrt, body, NULL); ++ hlsl_transform_ir(ctx, lower_dot, body, NULL); ++ hlsl_transform_ir(ctx, lower_round, body, NULL); + } + + if (profile->major_version < 2) + { +- transform_ir(ctx, lower_abs, body, NULL); ++ hlsl_transform_ir(ctx, lower_abs, body, NULL); + } + +- transform_ir(ctx, validate_static_object_references, body, NULL); ++ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); ++ hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + + /* TODO: move forward, remove when no longer needed */ +- transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); +- while (transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); ++ hlsl_transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); ++ while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); + + do + compute_liveness(ctx, entry_func); +- while (transform_ir(ctx, dce, body, NULL)); ++ while (hlsl_transform_ir(ctx, dce, body, NULL)); + + compute_liveness(ctx, entry_func); + +@@ -3328,6 +3963,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + rb_for_each_entry(&ctx->functions, dump_function, ctx); + + allocate_register_reservations(ctx); ++ ++ calculate_resource_register_counts(ctx); ++ + allocate_temp_registers(ctx, entry_func); + if (profile->major_version < 4) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index 3210bbd5712..9fa2acd5d5e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -22,7 +22,49 @@ + + #include "hlsl.h" + +-static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) ++static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, ++ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[k].f = fabsf(src->value.u[k].f); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ dst->u[k].d = fabs(src->value.u[k].d); ++ break; ++ ++ case HLSL_TYPE_INT: ++ /* C's abs(INT_MIN) is undefined, but HLSL evaluates this to INT_MIN */ ++ if (src->value.u[k].i == INT_MIN) ++ dst->u[k].i = INT_MIN; ++ else ++ dst->u[k].i = abs(src->value.u[k].i); ++ break; ++ ++ case HLSL_TYPE_UINT: ++ dst->u[k].u = src->value.u[k].u; ++ break; ++ ++ default: ++ FIXME("Fold abs() for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, ++ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { + unsigned int k; + uint32_t u; +@@ -30,11 +72,11 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct + double d; + float f; + +- if (dst->node.data_type->dimx != src->node.data_type->dimx +- || dst->node.data_type->dimy != src->node.data_type->dimy) ++ if (dst_type->dimx != src->node.data_type->dimx ++ || dst_type->dimy != src->node.data_type->dimy) + { + FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), +- debug_hlsl_type(ctx, dst->node.data_type)); ++ debug_hlsl_type(ctx, dst_type)); + return false; + } + +@@ -44,61 +86,61 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- u = src->value[k].f; +- i = src->value[k].f; +- f = src->value[k].f; +- d = src->value[k].f; ++ u = src->value.u[k].f; ++ i = src->value.u[k].f; ++ f = src->value.u[k].f; ++ d = src->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: +- u = src->value[k].d; +- i = src->value[k].d; +- f = src->value[k].d; +- d = src->value[k].d; ++ u = src->value.u[k].d; ++ i = src->value.u[k].d; ++ f = src->value.u[k].d; ++ d = src->value.u[k].d; + break; + + case HLSL_TYPE_INT: +- u = src->value[k].i; +- i = src->value[k].i; +- f = src->value[k].i; +- d = src->value[k].i; ++ u = src->value.u[k].i; ++ i = src->value.u[k].i; ++ f = src->value.u[k].i; ++ d = src->value.u[k].i; + break; + + case HLSL_TYPE_UINT: +- u = src->value[k].u; +- i = src->value[k].u; +- f = src->value[k].u; +- d = src->value[k].u; ++ u = src->value.u[k].u; ++ i = src->value.u[k].u; ++ f = src->value.u[k].u; ++ d = src->value.u[k].u; + break; + + case HLSL_TYPE_BOOL: +- u = !!src->value[k].u; +- i = !!src->value[k].u; +- f = !!src->value[k].u; +- d = !!src->value[k].u; ++ u = !!src->value.u[k].u; ++ i = !!src->value.u[k].u; ++ f = !!src->value.u[k].u; ++ d = !!src->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + +- switch (dst->node.data_type->base_type) ++ switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- dst->value[k].f = f; ++ dst->u[k].f = f; + break; + + case HLSL_TYPE_DOUBLE: +- dst->value[k].d = d; ++ dst->u[k].d = d; + break; + + case HLSL_TYPE_INT: +- dst->value[k].i = i; ++ dst->u[k].i = i; + break; + + case HLSL_TYPE_UINT: +- dst->value[k].u = u; ++ dst->u[k].u = u; + break; + + case HLSL_TYPE_BOOL: +@@ -110,9 +152,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct + return true; + } + +-static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) ++static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, ++ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); +@@ -123,30 +166,30 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- dst->value[k].f = -src->value[k].f; ++ dst->u[k].f = -src->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: +- dst->value[k].d = -src->value[k].d; ++ dst->u[k].d = -src->value.u[k].d; + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->value[k].u = -src->value[k].u; ++ dst->u[k].u = -src->value.u[k].u; + break; + + default: +- FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, +- struct hlsl_ir_constant *src2) ++static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); +@@ -158,32 +201,32 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- dst->value[k].f = src1->value[k].f + src2->value[k].f; ++ dst->u[k].f = src1->value.u[k].f + src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: +- dst->value[k].d = src1->value[k].d + src2->value[k].d; ++ dst->u[k].d = src1->value.u[k].d + src2->value.u[k].d; + break; + + /* Handling HLSL_TYPE_INT through the unsigned field to avoid + * undefined behavior with signed integers in C. */ + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->value[k].u = src1->value[k].u + src2->value[k].u; ++ dst->u[k].u = src1->value.u[k].u + src2->value.u[k].u; + break; + + default: +- FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); +@@ -195,32 +238,32 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- dst->value[k].f = src1->value[k].f * src2->value[k].f; ++ dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: +- dst->value[k].d = src1->value[k].d * src2->value[k].d; ++ dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->value[k].u = src1->value[k].u * src2->value[k].u; ++ dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; + break; + + default: +- FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { + unsigned int k; + +- assert(dst->node.data_type->base_type == HLSL_TYPE_BOOL); ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < 4; ++k) +@@ -229,192 +272,192 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- dst->value[k].u = src1->value[k].f != src2->value[k].f; ++ dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: +- dst->value[k].u = src1->value[k].d != src2->value[k].d; ++ dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: +- dst->value[k].u = src1->value[k].u != src2->value[k].u; ++ dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + +- dst->value[k].u *= ~0u; ++ dst->u[k].u *= ~0u; + } + return true; + } + +-static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, ++ const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: +- if (ctx->profile->major_version >= 4 && src2->value[k].f == 0) ++ if (ctx->profile->major_version >= 4 && src2->value.u[k].f == 0) + { +- hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } +- dst->value[k].f = src1->value[k].f / src2->value[k].f; +- if (ctx->profile->major_version < 4 && !isfinite(dst->value[k].f)) ++ dst->u[k].f = src1->value.u[k].f / src2->value.u[k].f; ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { +- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: +- if (src2->value[k].d == 0) ++ if (src2->value.u[k].d == 0) + { +- hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } +- dst->value[k].d = src1->value[k].d / src2->value[k].d; ++ dst->u[k].d = src1->value.u[k].d / src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: +- if (src2->value[k].i == 0) ++ if (src2->value.u[k].i == 0) + { +- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + "Division by zero."); + return false; + } +- if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) +- dst->value[k].i = INT_MIN; ++ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) ++ dst->u[k].i = INT_MIN; + else +- dst->value[k].i = src1->value[k].i / src2->value[k].i; ++ dst->u[k].i = src1->value.u[k].i / src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: +- if (src2->value[k].u == 0) ++ if (src2->value.u[k].u == 0) + { +- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + "Division by zero."); + return false; + } +- dst->value[k].u = src1->value[k].u / src2->value[k].u; ++ dst->u[k].u = src1->value.u[k].u / src2->value.u[k].u; + break; + + default: +- FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, ++ const struct vkd3d_shader_location *loc) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_INT: +- if (src2->value[k].i == 0) ++ if (src2->value.u[k].i == 0) + { +- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, +- "Division by zero."); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } +- if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) +- dst->value[k].i = 0; ++ if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) ++ dst->u[k].i = 0; + else +- dst->value[k].i = src1->value[k].i % src2->value[k].i; ++ dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: +- if (src2->value[k].u == 0) ++ if (src2->value.u[k].u == 0) + { +- hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, +- "Division by zero."); ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } +- dst->value[k].u = src1->value[k].u % src2->value[k].u; ++ dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; + break; + + default: +- FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_INT: +- dst->value[k].i = max(src1->value[k].i, src2->value[k].i); ++ dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); + break; + + case HLSL_TYPE_UINT: +- dst->value[k].u = max(src1->value[k].u, src2->value[k].u); ++ dst->u[k].u = max(src1->value.u[k].u, src2->value.u[k].u); + break; + + default: +- FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; + } + +-static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, +- struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) ++static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +- enum hlsl_base_type type = dst->node.data_type->base_type; ++ enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + +- for (k = 0; k < dst->node.data_type->dimx; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_INT: +- dst->value[k].i = min(src1->value[k].i, src2->value[k].i); ++ dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); + break; + + case HLSL_TYPE_UINT: +- dst->value[k].u = min(src1->value[k].u, src2->value[k].u); ++ dst->u[k].u = min(src1->value.u[k].u, src2->value.u[k].u); + break; + + default: +- FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); ++ FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } +@@ -436,7 +479,7 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->value[k].u = src1->value[k].u ^ src2->value[k].u; ++ dst->value.u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; + break; + + default: +@@ -462,7 +505,7 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->value[k].u = src1->value[k].u & src2->value[k].u; ++ dst->value.u[k].u = src1->value.u[k].u & src2->value.u[k].u; + break; + + default: +@@ -488,7 +531,7 @@ static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, + { + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- dst->value[k].u = src1->value[k].u | src2->value[k].u; ++ dst->value.u[k].u = src1->value.u[k].u | src2->value.u[k].u; + break; + + default: +@@ -512,7 +555,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + if (!expr->operands[0].node) + return false; + +- if (instr->data_type->type > HLSL_CLASS_VECTOR) ++ if (instr->data_type->class > HLSL_CLASS_VECTOR) + return false; + + for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) +@@ -521,7 +564,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + { + if (expr->operands[i].node->type != HLSL_IR_CONSTANT) + return false; +- assert(expr->operands[i].node->data_type->type <= HLSL_CLASS_VECTOR); ++ assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); + } + } + arg1 = hlsl_ir_constant(expr->operands[0].node); +@@ -533,40 +576,44 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + + switch (expr->op) + { ++ case HLSL_OP1_ABS: ++ success = fold_abs(ctx, &res->value, instr->data_type, arg1); ++ break; ++ + case HLSL_OP1_CAST: +- success = fold_cast(ctx, res, arg1); ++ success = fold_cast(ctx, &res->value, instr->data_type, arg1); + break; + + case HLSL_OP1_NEG: +- success = fold_neg(ctx, res, arg1); ++ success = fold_neg(ctx, &res->value, instr->data_type, arg1); + break; + + case HLSL_OP2_ADD: +- success = fold_add(ctx, res, arg1, arg2); ++ success = fold_add(ctx, &res->value, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_MUL: +- success = fold_mul(ctx, res, arg1, arg2); ++ success = fold_mul(ctx, &res->value, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_NEQUAL: +- success = fold_nequal(ctx, res, arg1, arg2); ++ success = fold_nequal(ctx, &res->value, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_DIV: +- success = fold_div(ctx, res, arg1, arg2); ++ success = fold_div(ctx, &res->value, instr->data_type, arg1, arg2, &instr->loc); + break; + + case HLSL_OP2_MOD: +- success = fold_mod(ctx, res, arg1, arg2); ++ success = fold_mod(ctx, &res->value, instr->data_type, arg1, arg2, &instr->loc); + break; + + case HLSL_OP2_MAX: +- success = fold_max(ctx, res, arg1, arg2); ++ success = fold_max(ctx, &res->value, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_MIN: +- success = fold_min(ctx, res, arg1, arg2); ++ success = fold_min(ctx, &res->value, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: +@@ -616,7 +663,7 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + return false; + + for (i = 0; i < swizzle->node.data_type->dimx; ++i) +- res->value[i] = value->value[hlsl_swizzle_get_component(swizzle->swizzle, i)]; ++ res->value.u[i] = value->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + + list_add_before(&swizzle->node.entry, &res->node.entry); + hlsl_replace_node(&swizzle->node, &res->node); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c +deleted file mode 100644 +index 4a62d804ed6..00000000000 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c ++++ /dev/null +@@ -1,980 +0,0 @@ +-/* +- * HLSL code generation for DXBC shader models 1-3 +- * +- * Copyright 2019-2020 Zebediah Figura for CodeWeavers +- * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. +- * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +- */ +- +-#include "hlsl.h" +-#include +- +-bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +-{ +- unsigned int i; +- +- static const struct +- { +- const char *semantic; +- bool output; +- enum vkd3d_shader_type shader_type; +- unsigned int major_version; +- D3DSHADER_PARAM_REGISTER_TYPE type; +- DWORD offset; +- } +- register_table[] = +- { +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, +- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, +- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, +- }; +- +- for (i = 0; i < ARRAY_SIZE(register_table); ++i) +- { +- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) +- && output == register_table[i].output +- && ctx->profile->type == register_table[i].shader_type +- && ctx->profile->major_version == register_table[i].major_version) +- { +- *type = register_table[i].type; +- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) +- *reg = register_table[i].offset; +- else +- *reg = semantic->index; +- return true; +- } +- } +- +- return false; +-} +- +-bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +-{ +- static const struct +- { +- const char *name; +- D3DDECLUSAGE usage; +- } +- semantics[] = +- { +- {"binormal", D3DDECLUSAGE_BINORMAL}, +- {"blendindices", D3DDECLUSAGE_BLENDINDICES}, +- {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, +- {"color", D3DDECLUSAGE_COLOR}, +- {"depth", D3DDECLUSAGE_DEPTH}, +- {"fog", D3DDECLUSAGE_FOG}, +- {"normal", D3DDECLUSAGE_NORMAL}, +- {"position", D3DDECLUSAGE_POSITION}, +- {"positiont", D3DDECLUSAGE_POSITIONT}, +- {"psize", D3DDECLUSAGE_PSIZE}, +- {"sample", D3DDECLUSAGE_SAMPLE}, +- {"sv_depth", D3DDECLUSAGE_DEPTH}, +- {"sv_position", D3DDECLUSAGE_POSITION}, +- {"sv_target", D3DDECLUSAGE_COLOR}, +- {"tangent", D3DDECLUSAGE_TANGENT}, +- {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, +- {"texcoord", D3DDECLUSAGE_TEXCOORD}, +- }; +- +- unsigned int i; +- +- for (i = 0; i < ARRAY_SIZE(semantics); ++i) +- { +- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) +- { +- *usage = semantics[i].usage; +- *usage_idx = semantic->index; +- return true; +- } +- } +- +- return false; +-} +- +-static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) +-{ +- if (type == VKD3D_SHADER_TYPE_VERTEX) +- return D3DVS_VERSION(major, minor); +- else +- return D3DPS_VERSION(major, minor); +-} +- +-static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +-{ +- switch (type->type) +- { +- case HLSL_CLASS_ARRAY: +- return sm1_class(type->e.array.type); +- case HLSL_CLASS_MATRIX: +- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); +- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) +- return D3DXPC_MATRIX_COLUMNS; +- else +- return D3DXPC_MATRIX_ROWS; +- case HLSL_CLASS_OBJECT: +- return D3DXPC_OBJECT; +- case HLSL_CLASS_SCALAR: +- return D3DXPC_SCALAR; +- case HLSL_CLASS_STRUCT: +- return D3DXPC_STRUCT; +- case HLSL_CLASS_VECTOR: +- return D3DXPC_VECTOR; +- default: +- ERR("Invalid class %#x.\n", type->type); +- vkd3d_unreachable(); +- } +-} +- +-static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +-{ +- switch (type->base_type) +- { +- case HLSL_TYPE_BOOL: +- return D3DXPT_BOOL; +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return D3DXPT_FLOAT; +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- return D3DXPT_INT; +- case HLSL_TYPE_PIXELSHADER: +- return D3DXPT_PIXELSHADER; +- case HLSL_TYPE_SAMPLER: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3DXPT_SAMPLER1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3DXPT_SAMPLER2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3DXPT_SAMPLER3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3DXPT_SAMPLERCUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3DXPT_SAMPLER; +- default: +- ERR("Invalid dimension %#x.\n", type->sampler_dim); +- vkd3d_unreachable(); +- } +- break; +- case HLSL_TYPE_STRING: +- return D3DXPT_STRING; +- case HLSL_TYPE_TEXTURE: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3DXPT_TEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3DXPT_TEXTURE2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3DXPT_TEXTURE3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3DXPT_TEXTURECUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3DXPT_TEXTURE; +- default: +- ERR("Invalid dimension %#x.\n", type->sampler_dim); +- vkd3d_unreachable(); +- } +- break; +- case HLSL_TYPE_VERTEXSHADER: +- return D3DXPT_VERTEXSHADER; +- case HLSL_TYPE_VOID: +- return D3DXPT_VOID; +- default: +- vkd3d_unreachable(); +- } +-} +- +-static const struct hlsl_type *get_array_type(const struct hlsl_type *type) +-{ +- if (type->type == HLSL_CLASS_ARRAY) +- return get_array_type(type->e.array.type); +- return type; +-} +- +-static unsigned int get_array_size(const struct hlsl_type *type) +-{ +- if (type->type == HLSL_CLASS_ARRAY) +- return get_array_size(type->e.array.type) * type->e.array.elements_count; +- return 1; +-} +- +-static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +-{ +- const struct hlsl_type *array_type = get_array_type(type); +- unsigned int array_size = get_array_size(type); +- unsigned int field_count = 0; +- size_t fields_offset = 0; +- size_t i; +- +- if (type->bytecode_offset) +- return; +- +- if (array_type->type == HLSL_CLASS_STRUCT) +- { +- field_count = array_type->e.record.field_count; +- +- for (i = 0; i < field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- field->name_bytecode_offset = put_string(buffer, field->name); +- write_sm1_type(buffer, field->type, ctab_start); +- } +- +- fields_offset = bytecode_get_size(buffer) - ctab_start; +- +- for (i = 0; i < field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- put_u32(buffer, field->name_bytecode_offset - ctab_start); +- put_u32(buffer, field->type->bytecode_offset - ctab_start); +- } +- } +- +- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); +- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); +- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); +- put_u32(buffer, fields_offset); +-} +- +-static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +-{ +- struct hlsl_ir_var *var; +- +- list_remove(&to_sort->extern_entry); +- +- LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) +- { +- if (strcmp(to_sort->name, var->name) < 0) +- { +- list_add_before(&var->extern_entry, &to_sort->extern_entry); +- return; +- } +- } +- +- list_add_tail(sorted, &to_sort->extern_entry); +-} +- +-static void sm1_sort_externs(struct hlsl_ctx *ctx) +-{ +- struct list sorted = LIST_INIT(sorted); +- struct hlsl_ir_var *var, *next; +- +- LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- sm1_sort_extern(&sorted, var); +- list_move_tail(&ctx->extern_vars, &sorted); +-} +- +-static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- struct hlsl_ir_function_decl *entry_func) +-{ +- size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; +- unsigned int uniform_count = 0; +- struct hlsl_ir_var *var; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); +- +- if (!var->semantic.name && var->regs[regset].allocated) +- { +- ++uniform_count; +- +- if (var->is_param && var->is_uniform) +- { +- struct vkd3d_string_buffer *name; +- +- if (!(name = hlsl_get_string_buffer(ctx))) +- { +- buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; +- return; +- } +- vkd3d_string_buffer_printf(name, "$%s", var->name); +- vkd3d_free((char *)var->name); +- var->name = hlsl_strdup(ctx, name->buffer); +- hlsl_release_string_buffer(ctx, name); +- } +- } +- } +- +- sm1_sort_externs(ctx); +- +- size_offset = put_u32(buffer, 0); +- ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); +- +- ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); +- creator_offset = put_u32(buffer, 0); +- put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); +- put_u32(buffer, uniform_count); +- put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ +- put_u32(buffer, 0); /* FIXME: flags */ +- put_u32(buffer, 0); /* FIXME: target string */ +- +- vars_start = bytecode_get_size(buffer); +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); +- +- if (!var->semantic.name && var->regs[regset].allocated) +- { +- put_u32(buffer, 0); /* name */ +- if (var->data_type->type == HLSL_CLASS_OBJECT +- && (var->data_type->base_type == HLSL_TYPE_SAMPLER +- || var->data_type->base_type == HLSL_TYPE_TEXTURE)) +- { +- assert(regset == HLSL_REGSET_SAMPLERS); +- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); +- put_u32(buffer, 1); +- } +- else +- { +- assert(regset == HLSL_REGSET_NUMERIC); +- put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); +- put_u32(buffer, var->data_type->reg_size[regset] / 4); +- } +- put_u32(buffer, 0); /* type */ +- put_u32(buffer, 0); /* FIXME: default value */ +- } +- } +- +- uniform_count = 0; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); +- +- if (!var->semantic.name && var->regs[regset].allocated) +- { +- size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); +- size_t name_offset; +- +- name_offset = put_string(buffer, var->name); +- set_u32(buffer, var_offset, name_offset - ctab_start); +- +- write_sm1_type(buffer, var->data_type, ctab_start); +- set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); +- ++uniform_count; +- } +- } +- +- offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); +- set_u32(buffer, creator_offset, offset - ctab_start); +- +- ctab_end = bytecode_get_size(buffer); +- set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); +-} +- +-static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +-{ +- return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) +- | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +-} +- +-struct sm1_instruction +-{ +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; +- +- struct sm1_dst_register +- { +- D3DSHADER_PARAM_REGISTER_TYPE type; +- D3DSHADER_PARAM_DSTMOD_TYPE mod; +- unsigned int writemask; +- uint32_t reg; +- } dst; +- +- struct sm1_src_register +- { +- D3DSHADER_PARAM_REGISTER_TYPE type; +- D3DSHADER_PARAM_SRCMOD_TYPE mod; +- unsigned int swizzle; +- uint32_t reg; +- } srcs[3]; +- unsigned int src_count; +- +- unsigned int has_dst; +-}; +- +-static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +-{ +- assert(reg->writemask); +- put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); +-} +- +-static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, +- const struct sm1_src_register *reg) +-{ +- put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); +-} +- +-static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct sm1_instruction *instr) +-{ +- uint32_t token = instr->opcode; +- unsigned int i; +- +- if (ctx->profile->major_version > 1) +- token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; +- put_u32(buffer, token); +- +- if (instr->has_dst) +- write_sm1_dst_register(buffer, &instr->dst); +- +- for (i = 0; i < instr->src_count; ++i) +- write_sm1_src_register(buffer, &instr->srcs[i]); +-}; +- +-static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) +-{ +- src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); +-} +- +-static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, +- const struct hlsl_reg *src3) +-{ +- struct sm1_instruction instr = +- { +- .opcode = D3DSIO_DP2ADD, +- +- .dst.type = D3DSPR_TEMP, +- .dst.writemask = dst->writemask, +- .dst.reg = dst->id, +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_TEMP, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), +- .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, +- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), +- .srcs[1].reg = src2->id, +- .srcs[2].type = D3DSPR_TEMP, +- .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), +- .srcs[2].reg = src3->id, +- .src_count = 3, +- }; +- +- write_sm1_instruction(ctx, buffer, &instr); +-} +- +-static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) +-{ +- struct sm1_instruction instr = +- { +- .opcode = opcode, +- +- .dst.type = D3DSPR_TEMP, +- .dst.writemask = dst->writemask, +- .dst.reg = dst->id, +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_TEMP, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), +- .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, +- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), +- .srcs[1].reg = src2->id, +- .src_count = 2, +- }; +- +- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); +- sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); +-} +- +-static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) +-{ +- struct sm1_instruction instr = +- { +- .opcode = opcode, +- +- .dst.type = D3DSPR_TEMP, +- .dst.writemask = dst->writemask, +- .dst.reg = dst->id, +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_TEMP, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), +- .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, +- .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), +- .srcs[1].reg = src2->id, +- .src_count = 2, +- }; +- +- write_sm1_instruction(ctx, buffer, &instr); +-} +- +-static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +-{ +- struct sm1_instruction instr = +- { +- .opcode = opcode, +- +- .dst.type = D3DSPR_TEMP, +- .dst.mod = dst_mod, +- .dst.writemask = dst->writemask, +- .dst.reg = dst->id, +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_TEMP, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), +- .srcs[0].reg = src->id, +- .srcs[0].mod = src_mod, +- .src_count = 1, +- }; +- +- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); +-} +- +-static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +-{ +- unsigned int i, x; +- +- for (i = 0; i < ctx->constant_defs.count; ++i) +- { +- uint32_t token = D3DSIO_DEF; +- const struct sm1_dst_register reg = +- { +- .type = D3DSPR_CONST, +- .writemask = VKD3DSP_WRITEMASK_ALL, +- .reg = i, +- }; +- +- if (ctx->profile->major_version > 1) +- token |= 5 << D3DSI_INSTLENGTH_SHIFT; +- put_u32(buffer, token); +- +- write_sm1_dst_register(buffer, ®); +- for (x = 0; x < 4; ++x) +- put_f32(buffer, ctx->constant_defs.values[i].f[x]); +- } +-} +- +-static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_var *var, bool output) +-{ +- struct sm1_dst_register reg = {0}; +- uint32_t token, usage_idx; +- D3DDECLUSAGE usage; +- bool ret; +- +- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) +- { +- usage = 0; +- usage_idx = 0; +- } +- else +- { +- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); +- assert(ret); +- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; +- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; +- } +- +- token = D3DSIO_DCL; +- if (ctx->profile->major_version > 1) +- token |= 2 << D3DSI_INSTLENGTH_SHIFT; +- put_u32(buffer, token); +- +- token = (1u << 31); +- token |= usage << D3DSP_DCL_USAGE_SHIFT; +- token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; +- put_u32(buffer, token); +- +- reg.writemask = (1 << var->data_type->dimx) - 1; +- write_sm1_dst_register(buffer, ®); +-} +- +-static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +-{ +- bool write_in = false, write_out = false; +- struct hlsl_ir_var *var; +- +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) +- write_in = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) +- write_in = write_out = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) +- write_in = true; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (write_in && var->is_input_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, false); +- if (write_out && var->is_output_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, true); +- } +-} +- +-static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) +-{ +- const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); +- struct sm1_instruction sm1_instr = +- { +- .opcode = D3DSIO_MOV, +- +- .dst.type = D3DSPR_TEMP, +- .dst.reg = instr->reg.id, +- .dst.writemask = instr->reg.writemask, +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_CONST, +- .srcs[0].reg = constant->reg.id, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), +- .src_count = 1, +- }; +- +- assert(instr->reg.allocated); +- assert(constant->reg.allocated); +- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); +-} +- +-static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) +-{ +- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); +- struct hlsl_ir_node *arg1 = expr->operands[0].node; +- unsigned int i; +- +- for (i = 0; i < instr->data_type->dimx; ++i) +- { +- struct hlsl_reg src = arg1->reg, dst = instr->reg; +- +- src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); +- dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); +- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); +- } +-} +- +-static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +-{ +- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); +- struct hlsl_ir_node *arg1 = expr->operands[0].node; +- struct hlsl_ir_node *arg2 = expr->operands[1].node; +- struct hlsl_ir_node *arg3 = expr->operands[2].node; +- +- assert(instr->reg.allocated); +- +- if (instr->data_type->base_type != HLSL_TYPE_FLOAT) +- { +- /* These need to be lowered. */ +- hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); +- return; +- } +- +- switch (expr->op) +- { +- case HLSL_OP1_ABS: +- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); +- break; +- +- case HLSL_OP1_EXP2: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); +- break; +- +- case HLSL_OP1_NEG: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); +- break; +- +- case HLSL_OP1_SAT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); +- break; +- +- case HLSL_OP1_RCP: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); +- break; +- +- case HLSL_OP1_RSQ: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); +- break; +- +- case HLSL_OP2_ADD: +- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); +- break; +- +- case HLSL_OP2_MAX: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); +- break; +- +- case HLSL_OP2_MIN: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); +- break; +- +- case HLSL_OP2_MUL: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); +- break; +- +- case HLSL_OP1_FRACT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); +- break; +- +- case HLSL_OP2_DOT: +- switch (arg1->data_type->dimx) +- { +- case 4: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); +- break; +- +- case 3: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); +- break; +- +- default: +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_OP3_DP2ADD: +- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); +- break; +- +- default: +- hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); +- break; +- } +-} +- +-static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +-{ +- const struct hlsl_ir_load *load = hlsl_ir_load(instr); +- const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); +- struct sm1_instruction sm1_instr = +- { +- .opcode = D3DSIO_MOV, +- +- .dst.type = D3DSPR_TEMP, +- .dst.reg = instr->reg.id, +- .dst.writemask = instr->reg.writemask, +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_TEMP, +- .srcs[0].reg = reg.id, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), +- .src_count = 1, +- }; +- +- assert(instr->reg.allocated); +- +- if (load->src.var->is_uniform) +- { +- assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_CONST; +- } +- else if (load->src.var->is_input_semantic) +- { +- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, +- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) +- { +- assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_INPUT; +- sm1_instr.srcs[0].reg = reg.id; +- } +- else +- sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); +- } +- +- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); +-} +- +-static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) +-{ +- const struct hlsl_ir_store *store = hlsl_ir_store(instr); +- const struct hlsl_ir_node *rhs = store->rhs.node; +- const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); +- struct sm1_instruction sm1_instr = +- { +- .opcode = D3DSIO_MOV, +- +- .dst.type = D3DSPR_TEMP, +- .dst.reg = reg.id, +- .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_TEMP, +- .srcs[0].reg = rhs->reg.id, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), +- .src_count = 1, +- }; +- +- if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX) +- { +- FIXME("Matrix writemasks need to be lowered.\n"); +- return; +- } +- +- if (store->lhs.var->is_output_semantic) +- { +- if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, +- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) +- { +- assert(reg.allocated); +- sm1_instr.dst.type = D3DSPR_OUTPUT; +- sm1_instr.dst.reg = reg.id; +- } +- else +- sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; +- } +- else +- assert(reg.allocated); +- +- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); +-} +- +-static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) +-{ +- const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); +- const struct hlsl_ir_node *val = swizzle->val.node; +- struct sm1_instruction sm1_instr = +- { +- .opcode = D3DSIO_MOV, +- +- .dst.type = D3DSPR_TEMP, +- .dst.reg = instr->reg.id, +- .dst.writemask = instr->reg.writemask, +- .has_dst = 1, +- +- .srcs[0].type = D3DSPR_TEMP, +- .srcs[0].reg = val->reg.id, +- .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), +- swizzle->swizzle, instr->data_type->dimx), +- .src_count = 1, +- }; +- +- assert(instr->reg.allocated); +- assert(val->reg.allocated); +- sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); +-} +- +-static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_function_decl *entry_func) +-{ +- const struct hlsl_ir_node *instr; +- +- LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) +- { +- if (instr->data_type) +- { +- if (instr->data_type->type == HLSL_CLASS_MATRIX) +- { +- /* These need to be lowered. */ +- hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); +- continue; +- } +- else if (instr->data_type->type == HLSL_CLASS_OBJECT) +- { +- hlsl_fixme(ctx, &instr->loc, "Object copy."); +- break; +- } +- +- assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); +- } +- +- switch (instr->type) +- { +- case HLSL_IR_CALL: +- vkd3d_unreachable(); +- +- case HLSL_IR_CONSTANT: +- write_sm1_constant(ctx, buffer, instr); +- break; +- +- case HLSL_IR_EXPR: +- write_sm1_expr(ctx, buffer, instr); +- break; +- +- case HLSL_IR_LOAD: +- write_sm1_load(ctx, buffer, instr); +- break; +- +- case HLSL_IR_STORE: +- write_sm1_store(ctx, buffer, instr); +- break; +- +- case HLSL_IR_SWIZZLE: +- write_sm1_swizzle(ctx, buffer, instr); +- break; +- +- default: +- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); +- } +- } +-} +- +-int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +-{ +- struct vkd3d_bytecode_buffer buffer = {0}; +- int ret; +- +- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); +- +- write_sm1_uniforms(ctx, &buffer, entry_func); +- +- write_sm1_constant_defs(ctx, &buffer); +- write_sm1_semantic_dcls(ctx, &buffer); +- write_sm1_instructions(ctx, &buffer, entry_func); +- +- put_u32(&buffer, D3DSIO_END); +- +- if (!(ret = buffer.status)) +- { +- out->code = buffer.data; +- out->size = buffer.size; +- } +- return ret; +-} +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c +deleted file mode 100644 +index 553a75818e7..00000000000 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c ++++ /dev/null +@@ -1,2531 +0,0 @@ +-/* +- * HLSL code generation for DXBC shader models 4-5 +- * +- * Copyright 2019-2020 Zebediah Figura for CodeWeavers +- * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. +- * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +- */ +- +-#include "hlsl.h" +-#include +-#include "d3dcommon.h" +-#include "sm4.h" +- +-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); +- +-static bool type_is_integer(const struct hlsl_type *type) +-{ +- switch (type->base_type) +- { +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- return true; +- +- default: +- return false; +- } +-} +- +-bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) +-{ +- unsigned int i; +- +- static const struct +- { +- const char *semantic; +- bool output; +- enum vkd3d_shader_type shader_type; +- enum vkd3d_sm4_swizzle_type swizzle_type; +- enum vkd3d_sm4_register_type type; +- bool has_idx; +- } +- register_table[] = +- { +- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, +- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, +- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, +- +- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, +- +- /* Put sv_target in this table, instead of letting it fall through to +- * default varying allocation, so that the register index matches the +- * usage index. */ +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, +- }; +- +- for (i = 0; i < ARRAY_SIZE(register_table); ++i) +- { +- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) +- && output == register_table[i].output +- && ctx->profile->type == register_table[i].shader_type) +- { +- *type = register_table[i].type; +- if (swizzle_type) +- *swizzle_type = register_table[i].swizzle_type; +- *has_idx = register_table[i].has_idx; +- return true; +- } +- } +- +- return false; +-} +- +-bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3D_NAME *usage) +-{ +- unsigned int i; +- +- static const struct +- { +- const char *name; +- bool output; +- enum vkd3d_shader_type shader_type; +- D3DDECLUSAGE usage; +- } +- semantics[] = +- { +- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, +- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, +- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, +- +- {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, +- +- {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, +- +- {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, +- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, +- +- {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, +- {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, +- +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, +- }; +- +- for (i = 0; i < ARRAY_SIZE(semantics); ++i) +- { +- if (!ascii_strcasecmp(semantic->name, semantics[i].name) +- && output == semantics[i].output +- && ctx->profile->type == semantics[i].shader_type +- && !ascii_strncasecmp(semantic->name, "sv_", 3)) +- { +- *usage = semantics[i].usage; +- return true; +- } +- } +- +- if (!ascii_strncasecmp(semantic->name, "sv_", 3)) +- return false; +- +- *usage = D3D_NAME_UNDEFINED; +- return true; +-} +- +-static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +-{ +- struct vkd3d_bytecode_buffer buffer = {0}; +- struct vkd3d_string_buffer *string; +- const struct hlsl_ir_var *var; +- size_t count_position; +- unsigned int i; +- bool ret; +- +- count_position = put_u32(&buffer, 0); +- put_u32(&buffer, 8); /* unknown */ +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; +- enum vkd3d_sm4_register_type type; +- uint32_t usage_idx, reg_idx; +- D3D_NAME usage; +- bool has_idx; +- +- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) +- continue; +- +- ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); +- assert(ret); +- if (usage == ~0u) +- continue; +- usage_idx = var->semantic.index; +- +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) +- { +- reg_idx = has_idx ? var->semantic.index : ~0u; +- } +- else +- { +- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); +- type = VKD3D_SM4_RT_INPUT; +- reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; +- } +- +- use_mask = width; /* FIXME: accurately report use mask */ +- if (output) +- use_mask = 0xf ^ use_mask; +- +- /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ +- if (usage >= 64) +- usage = 0; +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, usage_idx); +- put_u32(&buffer, usage); +- switch (var->data_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); +- break; +- +- case HLSL_TYPE_INT: +- put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); +- break; +- +- default: +- if ((string = hlsl_type_to_string(ctx, var->data_type))) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid data type %s for semantic variable %s.", string->buffer, var->name); +- hlsl_release_string_buffer(ctx, string); +- put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); +- } +- put_u32(&buffer, reg_idx); +- put_u32(&buffer, vkd3d_make_u16(width, use_mask)); +- } +- +- i = 0; +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- const char *semantic = var->semantic.name; +- size_t string_offset; +- D3D_NAME usage; +- +- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) +- continue; +- +- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); +- if (usage == ~0u) +- continue; +- +- if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) +- string_offset = put_string(&buffer, "SV_Target"); +- else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) +- string_offset = put_string(&buffer, "SV_Depth"); +- else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) +- string_offset = put_string(&buffer, "SV_Position"); +- else +- string_offset = put_string(&buffer, semantic); +- set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); +- } +- +- set_u32(&buffer, count_position, i); +- +- dxbc_writer_add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, buffer.data, buffer.size); +-} +- +-static const struct hlsl_type *get_array_type(const struct hlsl_type *type) +-{ +- if (type->type == HLSL_CLASS_ARRAY) +- return get_array_type(type->e.array.type); +- return type; +-} +- +-static unsigned int get_array_size(const struct hlsl_type *type) +-{ +- if (type->type == HLSL_CLASS_ARRAY) +- return get_array_size(type->e.array.type) * type->e.array.elements_count; +- return 1; +-} +- +-static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +-{ +- switch (type->type) +- { +- case HLSL_CLASS_ARRAY: +- return sm4_class(type->e.array.type); +- case HLSL_CLASS_MATRIX: +- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); +- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) +- return D3D_SVC_MATRIX_COLUMNS; +- else +- return D3D_SVC_MATRIX_ROWS; +- case HLSL_CLASS_OBJECT: +- return D3D_SVC_OBJECT; +- case HLSL_CLASS_SCALAR: +- return D3D_SVC_SCALAR; +- case HLSL_CLASS_STRUCT: +- return D3D_SVC_STRUCT; +- case HLSL_CLASS_VECTOR: +- return D3D_SVC_VECTOR; +- default: +- ERR("Invalid class %#x.\n", type->type); +- vkd3d_unreachable(); +- } +-} +- +-static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +-{ +- switch (type->base_type) +- { +- case HLSL_TYPE_BOOL: +- return D3D_SVT_BOOL; +- case HLSL_TYPE_DOUBLE: +- return D3D_SVT_DOUBLE; +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return D3D_SVT_FLOAT; +- case HLSL_TYPE_INT: +- return D3D_SVT_INT; +- case HLSL_TYPE_PIXELSHADER: +- return D3D_SVT_PIXELSHADER; +- case HLSL_TYPE_SAMPLER: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3D_SVT_SAMPLER1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3D_SVT_SAMPLER2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3D_SVT_SAMPLER3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3D_SVT_SAMPLERCUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3D_SVT_SAMPLER; +- default: +- vkd3d_unreachable(); +- } +- break; +- case HLSL_TYPE_STRING: +- return D3D_SVT_STRING; +- case HLSL_TYPE_TEXTURE: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3D_SVT_TEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3D_SVT_TEXTURE2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3D_SVT_TEXTURE3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3D_SVT_TEXTURECUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3D_SVT_TEXTURE; +- default: +- vkd3d_unreachable(); +- } +- break; +- case HLSL_TYPE_UINT: +- return D3D_SVT_UINT; +- case HLSL_TYPE_VERTEXSHADER: +- return D3D_SVT_VERTEXSHADER; +- case HLSL_TYPE_VOID: +- return D3D_SVT_VOID; +- default: +- vkd3d_unreachable(); +- } +-} +- +-static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +-{ +- const struct hlsl_type *array_type = get_array_type(type); +- const char *name = array_type->name ? array_type->name : ""; +- const struct hlsl_profile_info *profile = ctx->profile; +- unsigned int field_count = 0, array_size = 0; +- size_t fields_offset = 0, name_offset = 0; +- size_t i; +- +- if (type->bytecode_offset) +- return; +- +- if (profile->major_version >= 5) +- name_offset = put_string(buffer, name); +- +- if (type->type == HLSL_CLASS_ARRAY) +- array_size = get_array_size(type); +- +- if (array_type->type == HLSL_CLASS_STRUCT) +- { +- field_count = array_type->e.record.field_count; +- +- for (i = 0; i < field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- field->name_bytecode_offset = put_string(buffer, field->name); +- write_sm4_type(ctx, buffer, field->type); +- } +- +- fields_offset = bytecode_get_size(buffer); +- +- for (i = 0; i < field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- put_u32(buffer, field->name_bytecode_offset); +- put_u32(buffer, field->type->bytecode_offset); +- put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); +- } +- } +- +- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); +- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); +- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); +- put_u32(buffer, fields_offset); +- +- if (profile->major_version >= 5) +- { +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, 0); /* FIXME: unknown */ +- put_u32(buffer, name_offset); +- } +-} +- +-static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +-{ +- switch (type->base_type) +- { +- case HLSL_TYPE_SAMPLER: +- return D3D_SIT_SAMPLER; +- case HLSL_TYPE_TEXTURE: +- return D3D_SIT_TEXTURE; +- case HLSL_TYPE_UAV: +- return D3D_SIT_UAV_RWTYPED; +- default: +- vkd3d_unreachable(); +- } +-} +- +-static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +-{ +- switch (type->e.resource_format->base_type) +- { +- case HLSL_TYPE_DOUBLE: +- return D3D_RETURN_TYPE_DOUBLE; +- +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return D3D_RETURN_TYPE_FLOAT; +- +- case HLSL_TYPE_INT: +- return D3D_RETURN_TYPE_SINT; +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- return D3D_RETURN_TYPE_UINT; +- +- default: +- vkd3d_unreachable(); +- } +-} +- +-static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) +-{ +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3D_SRV_DIMENSION_TEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3D_SRV_DIMENSION_TEXTURE2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3D_SRV_DIMENSION_TEXTURE3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3D_SRV_DIMENSION_TEXTURECUBE; +- case HLSL_SAMPLER_DIM_1DARRAY: +- return D3D_SRV_DIMENSION_TEXTURE1DARRAY; +- case HLSL_SAMPLER_DIM_2DARRAY: +- return D3D_SRV_DIMENSION_TEXTURE2DARRAY; +- case HLSL_SAMPLER_DIM_2DMS: +- return D3D_SRV_DIMENSION_TEXTURE2DMS; +- case HLSL_SAMPLER_DIM_2DMSARRAY: +- return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; +- case HLSL_SAMPLER_DIM_CUBEARRAY: +- return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; +- default: +- vkd3d_unreachable(); +- } +-} +- +-static int sm4_compare_extern_resources(const void *a, const void *b) +-{ +- const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; +- const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; +- enum hlsl_regset aa_regset, bb_regset; +- +- aa_regset = hlsl_type_get_regset(aa->data_type); +- bb_regset = hlsl_type_get_regset(bb->data_type); +- +- if (aa_regset != bb_regset) +- return aa_regset - bb_regset; +- +- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; +-} +- +-static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +-{ +- const struct hlsl_ir_var **extern_resources = NULL; +- const struct hlsl_ir_var *var; +- enum hlsl_regset regset; +- size_t capacity = 0; +- +- *count = 0; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (!hlsl_type_is_resource(var->data_type)) +- continue; +- regset = hlsl_type_get_regset(var->data_type); +- if (!var->regs[regset].allocated) +- continue; +- +- if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, +- sizeof(*extern_resources)))) +- { +- *count = 0; +- return NULL; +- } +- +- extern_resources[*count] = var; +- ++*count; +- } +- +- qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); +- return extern_resources; +-} +- +-static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +-{ +- unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; +- size_t cbuffers_offset, resources_offset, creator_offset, string_offset; +- size_t cbuffer_position, resource_position, creator_position; +- const struct hlsl_profile_info *profile = ctx->profile; +- const struct hlsl_ir_var **extern_resources; +- struct vkd3d_bytecode_buffer buffer = {0}; +- const struct hlsl_buffer *cbuffer; +- const struct hlsl_ir_var *var; +- +- static const uint16_t target_types[] = +- { +- 0xffff, /* PIXEL */ +- 0xfffe, /* VERTEX */ +- 0x4753, /* GEOMETRY */ +- 0x4853, /* HULL */ +- 0x4453, /* DOMAIN */ +- 0x4353, /* COMPUTE */ +- }; +- +- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); +- +- resource_count += extern_resources_count; +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (cbuffer->reg.allocated) +- { +- ++cbuffer_count; +- ++resource_count; +- } +- } +- +- put_u32(&buffer, cbuffer_count); +- cbuffer_position = put_u32(&buffer, 0); +- put_u32(&buffer, resource_count); +- resource_position = put_u32(&buffer, 0); +- put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), +- target_types[profile->type])); +- put_u32(&buffer, 0); /* FIXME: compilation flags */ +- creator_position = put_u32(&buffer, 0); +- +- if (profile->major_version >= 5) +- { +- put_u32(&buffer, TAG_RD11); +- put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ +- put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ +- put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ +- put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ +- put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ +- put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ +- put_u32(&buffer, 0); /* unknown; possibly a null terminator */ +- } +- +- /* Bound resources. */ +- +- resources_offset = bytecode_get_size(&buffer); +- set_u32(&buffer, resource_position, resources_offset); +- +- for (i = 0; i < extern_resources_count; ++i) +- { +- enum hlsl_regset regset; +- uint32_t flags = 0; +- +- var = extern_resources[i]; +- regset = hlsl_type_get_regset(var->data_type); +- +- if (var->reg_reservation.type) +- flags |= D3D_SIF_USERPACKED; +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, sm4_resource_type(var->data_type)); +- if (regset == HLSL_REGSET_SAMPLERS) +- { +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- } +- else +- { +- put_u32(&buffer, sm4_resource_format(var->data_type)); +- put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); +- put_u32(&buffer, ~0u); /* FIXME: multisample count */ +- flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; +- } +- put_u32(&buffer, var->regs[regset].id); +- put_u32(&buffer, 1); /* bind count */ +- put_u32(&buffer, flags); +- } +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- uint32_t flags = 0; +- +- if (!cbuffer->reg.allocated) +- continue; +- +- if (cbuffer->reservation.type) +- flags |= D3D_SIF_USERPACKED; +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); +- put_u32(&buffer, 0); /* return type */ +- put_u32(&buffer, 0); /* dimension */ +- put_u32(&buffer, 0); /* multisample count */ +- put_u32(&buffer, cbuffer->reg.id); /* bind point */ +- put_u32(&buffer, 1); /* bind count */ +- put_u32(&buffer, flags); /* flags */ +- } +- +- for (i = 0; i < extern_resources_count; ++i) +- { +- var = extern_resources[i]; +- +- string_offset = put_string(&buffer, var->name); +- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); +- } +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (!cbuffer->reg.allocated) +- continue; +- +- string_offset = put_string(&buffer, cbuffer->name); +- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); +- } +- +- /* Buffers. */ +- +- cbuffers_offset = bytecode_get_size(&buffer); +- set_u32(&buffer, cbuffer_position, cbuffers_offset); +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- unsigned int var_count = 0; +- +- if (!cbuffer->reg.allocated) +- continue; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform && var->buffer == cbuffer) +- ++var_count; +- } +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, var_count); +- put_u32(&buffer, 0); /* variable offset */ +- put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); +- put_u32(&buffer, 0); /* FIXME: flags */ +- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); +- } +- +- i = 0; +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (!cbuffer->reg.allocated) +- continue; +- +- string_offset = put_string(&buffer, cbuffer->name); +- set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); +- } +- +- i = 0; +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- size_t vars_start = bytecode_get_size(&buffer); +- +- if (!cbuffer->reg.allocated) +- continue; +- +- set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform && var->buffer == cbuffer) +- { +- uint32_t flags = 0; +- +- if (var->last_read) +- flags |= D3D_SVF_USED; +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, var->buffer_offset * sizeof(float)); +- put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); +- put_u32(&buffer, flags); +- put_u32(&buffer, 0); /* type */ +- put_u32(&buffer, 0); /* FIXME: default value */ +- +- if (profile->major_version >= 5) +- { +- put_u32(&buffer, 0); /* texture start */ +- put_u32(&buffer, 0); /* texture count */ +- put_u32(&buffer, 0); /* sampler start */ +- put_u32(&buffer, 0); /* sampler count */ +- } +- } +- } +- +- j = 0; +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform && var->buffer == cbuffer) +- { +- const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); +- size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); +- size_t string_offset = put_string(&buffer, var->name); +- +- set_u32(&buffer, var_offset, string_offset); +- write_sm4_type(ctx, &buffer, var->data_type); +- set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); +- ++j; +- } +- } +- } +- +- creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); +- set_u32(&buffer, creator_position, creator_offset); +- +- dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size); +- +- vkd3d_free(extern_resources); +-} +- +-static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +-{ +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return VKD3D_SM4_RESOURCE_TEXTURE_1D; +- case HLSL_SAMPLER_DIM_2D: +- return VKD3D_SM4_RESOURCE_TEXTURE_2D; +- case HLSL_SAMPLER_DIM_3D: +- return VKD3D_SM4_RESOURCE_TEXTURE_3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; +- case HLSL_SAMPLER_DIM_1DARRAY: +- return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; +- case HLSL_SAMPLER_DIM_2DARRAY: +- return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; +- case HLSL_SAMPLER_DIM_2DMS: +- return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; +- case HLSL_SAMPLER_DIM_2DMSARRAY: +- return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; +- case HLSL_SAMPLER_DIM_CUBEARRAY: +- return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; +- default: +- vkd3d_unreachable(); +- } +-} +- +-struct sm4_instruction_modifier +-{ +- enum vkd3d_sm4_instruction_modifier type; +- +- union +- { +- struct +- { +- int u, v, w; +- } aoffimmi; +- } u; +-}; +- +-static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) +-{ +- uint32_t word = 0; +- +- word |= VKD3D_SM4_MODIFIER_MASK & imod->type; +- +- switch (imod->type) +- { +- case VKD3D_SM4_MODIFIER_AOFFIMMI: +- assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); +- assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); +- assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); +- word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; +- word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; +- word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; +- break; +- +- default: +- vkd3d_unreachable(); +- } +- +- return word; +-} +- +-struct sm4_register +-{ +- enum vkd3d_sm4_register_type type; +- uint32_t idx[2]; +- unsigned int idx_count; +- enum vkd3d_sm4_dimension dim; +- uint32_t immconst_uint[4]; +- unsigned int mod; +-}; +- +-struct sm4_instruction +-{ +- enum vkd3d_sm4_opcode opcode; +- +- struct sm4_instruction_modifier modifiers[1]; +- unsigned int modifier_count; +- +- struct sm4_dst_register +- { +- struct sm4_register reg; +- unsigned int writemask; +- } dsts[2]; +- unsigned int dst_count; +- +- struct sm4_src_register +- { +- struct sm4_register reg; +- enum vkd3d_sm4_swizzle_type swizzle_type; +- unsigned int swizzle; +- } srcs[4]; +- unsigned int src_count; +- +- uint32_t idx[3]; +- unsigned int idx_count; +-}; +- +-static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, +- unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, +- const struct hlsl_deref *deref, const struct hlsl_type *data_type) +-{ +- const struct hlsl_ir_var *var = deref->var; +- +- if (var->is_uniform) +- { +- if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) +- { +- reg->type = VKD3D_SM4_RT_RESOURCE; +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- if (swizzle_type) +- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; +- reg->idx_count = 1; +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) +- { +- reg->type = VKD3D_SM5_RT_UAV; +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- if (swizzle_type) +- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; +- reg->idx_count = 1; +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) +- { +- reg->type = VKD3D_SM4_RT_SAMPLER; +- reg->dim = VKD3D_SM4_DIMENSION_NONE; +- if (swizzle_type) +- *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; +- reg->idx_count = 1; +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; +- +- assert(data_type->type <= HLSL_CLASS_VECTOR); +- reg->type = VKD3D_SM4_RT_CONSTBUFFER; +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- if (swizzle_type) +- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->buffer->reg.id; +- reg->idx[1] = offset / 4; +- reg->idx_count = 2; +- *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); +- } +- } +- else if (var->is_input_semantic) +- { +- bool has_idx; +- +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); +- +- if (has_idx) +- { +- reg->idx[0] = var->semantic.index + offset / 4; +- reg->idx_count = 1; +- } +- +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); +- } +- else +- { +- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); +- +- assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_INPUT; +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- if (swizzle_type) +- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = hlsl_reg.id; +- reg->idx_count = 1; +- *writemask = hlsl_reg.writemask; +- } +- } +- else if (var->is_output_semantic) +- { +- bool has_idx; +- +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); +- +- if (has_idx) +- { +- reg->idx[0] = var->semantic.index + offset / 4; +- reg->idx_count = 1; +- } +- +- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) +- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; +- else +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); +- } +- else +- { +- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); +- +- assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_OUTPUT; +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- reg->idx[0] = hlsl_reg.id; +- reg->idx_count = 1; +- *writemask = hlsl_reg.writemask; +- } +- } +- else +- { +- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); +- +- assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_TEMP; +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- if (swizzle_type) +- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = hlsl_reg.id; +- reg->idx_count = 1; +- *writemask = hlsl_reg.writemask; +- } +-} +- +-static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, +- const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) +-{ +- unsigned int writemask; +- +- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); +- if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) +- src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +-} +- +-static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, +- enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) +-{ +- assert(instr->reg.allocated); +- reg->type = VKD3D_SM4_RT_TEMP; +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = instr->reg.id; +- reg->idx_count = 1; +- *writemask = instr->reg.writemask; +-} +- +-static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) +-{ +- unsigned int swizzle_type; +- +- sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); +-} +- +-static void sm4_src_from_node(struct sm4_src_register *src, +- const struct hlsl_ir_node *instr, unsigned int map_writemask) +-{ +- unsigned int writemask; +- +- sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); +- if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) +- src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +-} +- +-static uint32_t sm4_encode_register(const struct sm4_register *reg) +-{ +- return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) +- | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) +- | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); +-} +- +-static uint32_t sm4_register_order(const struct sm4_register *reg) +-{ +- uint32_t order = 1; +- if (reg->type == VKD3D_SM4_RT_IMMCONST) +- order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; +- order += reg->idx_count; +- if (reg->mod) +- ++order; +- return order; +-} +- +-static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) +-{ +- uint32_t token = instr->opcode; +- unsigned int size = 1, i, j; +- +- size += instr->modifier_count; +- for (i = 0; i < instr->dst_count; ++i) +- size += sm4_register_order(&instr->dsts[i].reg); +- for (i = 0; i < instr->src_count; ++i) +- size += sm4_register_order(&instr->srcs[i].reg); +- size += instr->idx_count; +- +- token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); +- +- if (instr->modifier_count > 0) +- token |= VKD3D_SM4_INSTRUCTION_MODIFIER; +- put_u32(buffer, token); +- +- for (i = 0; i < instr->modifier_count; ++i) +- { +- token = sm4_encode_instruction_modifier(&instr->modifiers[i]); +- if (instr->modifier_count > i + 1) +- token |= VKD3D_SM4_INSTRUCTION_MODIFIER; +- put_u32(buffer, token); +- } +- +- for (i = 0; i < instr->dst_count; ++i) +- { +- token = sm4_encode_register(&instr->dsts[i].reg); +- if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) +- token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; +- put_u32(buffer, token); +- +- for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) +- put_u32(buffer, instr->dsts[i].reg.idx[j]); +- } +- +- for (i = 0; i < instr->src_count; ++i) +- { +- token = sm4_encode_register(&instr->srcs[i].reg); +- token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; +- token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; +- if (instr->srcs[i].reg.mod) +- token |= VKD3D_SM4_EXTENDED_OPERAND; +- put_u32(buffer, token); +- +- if (instr->srcs[i].reg.mod) +- put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) +- | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); +- +- for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) +- put_u32(buffer, instr->srcs[i].reg.idx[j]); +- +- if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) +- { +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); +- if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) +- { +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); +- } +- } +- } +- +- for (j = 0; j < instr->idx_count; ++j) +- put_u32(buffer, instr->idx[j]); +-} +- +-static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, +- const struct hlsl_ir_node *texel_offset) +-{ +- struct sm4_instruction_modifier modif; +- struct hlsl_ir_constant *offset; +- +- if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) +- return false; +- offset = hlsl_ir_constant(texel_offset); +- +- modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; +- modif.u.aoffimmi.u = offset->value[0].i; +- modif.u.aoffimmi.v = offset->value[1].i; +- modif.u.aoffimmi.w = offset->value[2].i; +- if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 +- || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 +- || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) +- return false; +- +- instr->modifiers[instr->modifier_count++] = modif; +- return true; +-} +- +-static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) +-{ +- const struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, +- +- .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, +- .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, +- .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, +- .srcs[0].reg.idx_count = 2, +- .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, +- .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), +- .src_count = 1, +- }; +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +-{ +- const struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_DCL_SAMPLER, +- +- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, +- .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, +- .dsts[0].reg.idx_count = 1, +- .dst_count = 1, +- }; +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +-{ +- bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); +- struct sm4_instruction instr = +- { +- .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) +- | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), +- +- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, +- .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, +- .dsts[0].reg.idx_count = 1, +- .dst_count = 1, +- +- .idx[0] = sm4_resource_format(var->data_type) * 0x1111, +- .idx_count = 1, +- }; +- +- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS +- || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) +- { +- instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; +- } +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +-{ +- const struct hlsl_profile_info *profile = ctx->profile; +- const bool output = var->is_output_semantic; +- D3D_NAME usage; +- bool has_idx; +- +- struct sm4_instruction instr = +- { +- .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, +- .dst_count = 1, +- }; +- +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) +- { +- if (has_idx) +- { +- instr.dsts[0].reg.idx[0] = var->semantic.index; +- instr.dsts[0].reg.idx_count = 1; +- } +- else +- { +- instr.dsts[0].reg.idx_count = 0; +- } +- instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; +- } +- else +- { +- instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; +- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; +- instr.dsts[0].reg.idx_count = 1; +- instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; +- } +- +- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) +- instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; +- +- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); +- if (usage == ~0u) +- usage = D3D_NAME_UNDEFINED; +- +- if (var->is_input_semantic) +- { +- switch (usage) +- { +- case D3D_NAME_UNDEFINED: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; +- break; +- +- case D3D_NAME_INSTANCE_ID: +- case D3D_NAME_PRIMITIVE_ID: +- case D3D_NAME_VERTEX_ID: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; +- break; +- +- default: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) +- ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; +- break; +- } +- +- if (profile->type == VKD3D_SHADER_TYPE_PIXEL) +- { +- enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; +- +- if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) +- mode = VKD3DSIM_CONSTANT; +- +- instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; +- } +- } +- else +- { +- if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) +- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; +- else +- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; +- } +- +- switch (usage) +- { +- case D3D_NAME_COVERAGE: +- case D3D_NAME_DEPTH: +- case D3D_NAME_DEPTH_GREATER_EQUAL: +- case D3D_NAME_DEPTH_LESS_EQUAL: +- case D3D_NAME_TARGET: +- case D3D_NAME_UNDEFINED: +- break; +- +- default: +- instr.idx_count = 1; +- instr.idx[0] = usage; +- break; +- } +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_DCL_TEMPS, +- +- .idx = {temp_count}, +- .idx_count = 1, +- }; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, +- +- .idx = {thread_count[0], thread_count[1], thread_count[2]}, +- .idx_count = 3, +- }; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_RET, +- }; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); +- instr.srcs[0].reg.mod = src_mod; +- instr.src_count = 1; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, +- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, +- const struct hlsl_ir_node *src) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- assert(dst_idx < ARRAY_SIZE(instr.dsts)); +- sm4_dst_from_node(&instr.dsts[dst_idx], dst); +- assert(1 - dst_idx >= 0); +- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; +- instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; +- instr.dsts[1 - dst_idx].reg.idx_count = 0; +- instr.dst_count = 2; +- +- sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); +- instr.src_count = 1; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); +- sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); +- instr.src_count = 2; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-/* dp# instructions don't map the swizzle. */ +-static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 2; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, +- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, +- const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- assert(dst_idx < ARRAY_SIZE(instr.dsts)); +- sm4_dst_from_node(&instr.dsts[dst_idx], dst); +- assert(1 - dst_idx >= 0); +- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; +- instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; +- instr.dsts[1 - dst_idx].reg.idx_count = 0; +- instr.dst_count = 2; +- +- sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); +- sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); +- instr.src_count = 2; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_constant(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) +-{ +- const unsigned int dimx = constant->node.data_type->dimx; +- struct sm4_instruction instr; +- struct sm4_register *reg = &instr.srcs[0].reg; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_dst_from_node(&instr.dsts[0], &constant->node); +- instr.dst_count = 1; +- +- instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- reg->type = VKD3D_SM4_RT_IMMCONST; +- if (dimx == 1) +- { +- reg->dim = VKD3D_SM4_DIMENSION_SCALAR; +- reg->immconst_uint[0] = constant->value[0].u; +- } +- else +- { +- unsigned int i, j = 0; +- +- reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- for (i = 0; i < 4; ++i) +- { +- if (instr.dsts[0].writemask & (1u << i)) +- reg->immconst_uint[i] = constant->value[j++].u; +- } +- } +- instr.src_count = 1, +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, +- const struct hlsl_ir_node *texel_offset) +-{ +- bool uav = (resource_type->base_type == HLSL_TYPE_UAV); +- struct sm4_instruction instr; +- unsigned int dim_count; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD; +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7."); +- return; +- } +- } +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- +- if (!uav) +- { +- /* Mipmap level is in the last component in the IR, but needs to be in the W +- * component in the instruction. */ +- dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); +- if (dim_count == 1) +- instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); +- if (dim_count == 2) +- instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); +- } +- +- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); +- +- instr.src_count = 2; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, +- const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SAMPLE; +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7."); +- return; +- } +- } +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); +- sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 3; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static bool type_is_float(const struct hlsl_type *type) +-{ +- return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; +-} +- +-static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, +- const struct hlsl_ir_node *arg, uint32_t mask) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_AND; +- +- sm4_dst_from_node(&instr.dsts[0], &expr->node); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); +- instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; +- instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; +- instr.srcs[1].reg.immconst_uint[0] = mask; +- instr.src_count = 2; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_cast(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +-{ +- static const union +- { +- uint32_t u; +- float f; +- } one = { .f = 1.0 }; +- const struct hlsl_ir_node *arg1 = expr->operands[0].node; +- const struct hlsl_type *dst_type = expr->node.data_type; +- const struct hlsl_type *src_type = arg1->data_type; +- +- /* Narrowing casts were already lowered. */ +- assert(src_type->dimx == dst_type->dimx); +- +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- switch (src_type->base_type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); +- break; +- +- default: +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_TYPE_INT: +- switch (src_type->base_type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); +- break; +- +- default: +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_TYPE_UINT: +- switch (src_type->base_type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); +- break; +- +- default: +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_TYPE_HALF: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to half."); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); +- break; +- +- case HLSL_TYPE_BOOL: +- /* Casts to bool should have already been lowered. */ +- default: +- vkd3d_unreachable(); +- } +-} +- +-static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; +- +- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 2; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_expr(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +-{ +- const struct hlsl_ir_node *arg1 = expr->operands[0].node; +- const struct hlsl_ir_node *arg2 = expr->operands[1].node; +- const struct hlsl_type *dst_type = expr->node.data_type; +- struct vkd3d_string_buffer *dst_type_string; +- +- assert(expr->node.reg.allocated); +- +- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) +- return; +- +- switch (expr->op) +- { +- case HLSL_OP1_ABS: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_BIT_NOT: +- assert(type_is_integer(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_CAST: +- write_sm4_cast(ctx, buffer, expr); +- break; +- +- case HLSL_OP1_COS: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); +- break; +- +- case HLSL_OP1_EXP2: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_FLOOR: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_FRACT: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_LOG2: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_LOGIC_NOT: +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_NEG: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_REINTERPRET: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_ROUND: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_RSQ: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_SAT: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV +- | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), +- &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_SIN: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); +- break; +- +- case HLSL_OP1_SQRT: +- assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP2_ADD: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_BIT_AND: +- assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_BIT_OR: +- assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_BIT_XOR: +- assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_DIV: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_DOT: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- switch (arg1->data_type->dimx) +- { +- case 4: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); +- break; +- +- case 3: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); +- break; +- +- case 2: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); +- break; +- +- case 1: +- default: +- vkd3d_unreachable(); +- } +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_EQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- +- switch (src_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_GEQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- +- switch (src_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_LESS: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- +- switch (src_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_LOGIC_AND: +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_LOGIC_OR: +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_LSHIFT: +- assert(type_is_integer(dst_type)); +- assert(dst_type->base_type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_MAX: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_MIN: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_MOD: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_MUL: +- switch (dst_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- /* Using IMUL instead of UMUL because we're taking the low +- * bits, and the native compiler generates IMUL. */ +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_NEQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- assert(dst_type->base_type == HLSL_TYPE_BOOL); +- +- switch (src_type->base_type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_RSHIFT: +- assert(type_is_integer(dst_type)); +- assert(dst_type->base_type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, +- &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); +- } +- +- hlsl_release_string_buffer(ctx, dst_type_string); +-} +- +-static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, +- .src_count = 1, +- }; +- +- assert(iff->condition.node->data_type->dimx == 1); +- +- sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); +- write_sm4_instruction(buffer, &instr); +- +- write_sm4_block(ctx, buffer, &iff->then_instrs); +- +- if (!list_empty(&iff->else_instrs.instrs)) +- { +- instr.opcode = VKD3D_SM4_OP_ELSE; +- instr.src_count = 0; +- write_sm4_instruction(buffer, &instr); +- +- write_sm4_block(ctx, buffer, &iff->else_instrs); +- } +- +- instr.opcode = VKD3D_SM4_OP_ENDIF; +- instr.src_count = 0; +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_jump(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) +-{ +- struct sm4_instruction instr = {0}; +- +- switch (jump->type) +- { +- case HLSL_IR_JUMP_BREAK: +- instr.opcode = VKD3D_SM4_OP_BREAK; +- break; +- +- case HLSL_IR_JUMP_RETURN: +- vkd3d_unreachable(); +- +- default: +- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); +- return; +- } +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_load(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_dst_from_node(&instr.dsts[0], &load->node); +- instr.dst_count = 1; +- +- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, load->node.data_type, instr.dsts[0].writemask); +- instr.src_count = 1; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_loop(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_LOOP, +- }; +- +- write_sm4_instruction(buffer, &instr); +- +- write_sm4_block(ctx, buffer, &loop->body); +- +- instr.opcode = VKD3D_SM4_OP_ENDLOOP; +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, +- const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +-{ +- struct sm4_src_register *src; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- +- instr.opcode = VKD3D_SM4_OP_GATHER4; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- if (ctx->profile->major_version < 5) +- { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); +- return; +- } +- instr.opcode = VKD3D_SM5_OP_GATHER4_PO; +- sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); +- } +- } +- +- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); +- +- src = &instr.srcs[instr.src_count++]; +- sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); +- src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; +- src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; +- src->swizzle = swizzle; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_resource_load(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_type *resource_type = load->resource.var->data_type; +- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; +- const struct hlsl_ir_node *coords = load->coords.node; +- +- if (resource_type->type != HLSL_CLASS_OBJECT) +- { +- assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); +- hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); +- return; +- } +- +- if (load->sampler.var) +- { +- const struct hlsl_type *sampler_type = load->sampler.var->data_type; +- +- if (sampler_type->type != HLSL_CLASS_OBJECT) +- { +- assert(sampler_type->type == HLSL_CLASS_ARRAY || sampler_type->type == HLSL_CLASS_STRUCT); +- hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); +- return; +- } +- assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); +- assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC); +- +- if (!load->sampler.var->is_uniform) +- { +- hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); +- return; +- } +- } +- +- if (!load->resource.var->is_uniform) +- { +- hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); +- return; +- } +- +- switch (load->load_type) +- { +- case HLSL_RESOURCE_LOAD: +- write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, +- coords, texel_offset); +- break; +- +- case HLSL_RESOURCE_SAMPLE: +- if (!load->sampler.var) +- { +- hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); +- return; +- } +- write_sm4_sample(ctx, buffer, resource_type, &load->node, +- &load->resource, &load->sampler, coords, texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_RED: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_GREEN: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_BLUE: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_ALPHA: +- write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, +- &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); +- break; +- +- case HLSL_RESOURCE_SAMPLE_LOD: +- hlsl_fixme(ctx, &load->node.loc, "SM4 sample-LOD expression."); +- break; +- } +-} +- +-static void write_sm4_resource_store(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) +-{ +- const struct hlsl_type *resource_type = store->resource.var->data_type; +- +- if (resource_type->type != HLSL_CLASS_OBJECT) +- { +- assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); +- hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); +- return; +- } +- +- if (!store->resource.var->is_uniform) +- { +- hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); +- return; +- } +- +- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); +-} +- +-static void write_sm4_store(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) +-{ +- const struct hlsl_ir_node *rhs = store->rhs.node; +- struct sm4_instruction instr; +- unsigned int writemask; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); +- instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); +- instr.dst_count = 1; +- +- sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); +- instr.src_count = 1; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_swizzle(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +-{ +- struct sm4_instruction instr; +- unsigned int writemask; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); +- instr.dst_count = 1; +- +- sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); +- instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), +- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); +- instr.src_count = 1; +- +- write_sm4_instruction(buffer, &instr); +-} +- +-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block) +-{ +- const struct hlsl_ir_node *instr; +- +- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +- { +- if (instr->data_type) +- { +- if (instr->data_type->type == HLSL_CLASS_MATRIX) +- { +- hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); +- break; +- } +- else if (instr->data_type->type == HLSL_CLASS_OBJECT) +- { +- hlsl_fixme(ctx, &instr->loc, "Object copy."); +- break; +- } +- +- assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); +- } +- +- switch (instr->type) +- { +- case HLSL_IR_CALL: +- vkd3d_unreachable(); +- +- case HLSL_IR_CONSTANT: +- write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); +- break; +- +- case HLSL_IR_EXPR: +- write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); +- break; +- +- case HLSL_IR_IF: +- write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); +- break; +- +- case HLSL_IR_JUMP: +- write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); +- break; +- +- case HLSL_IR_LOAD: +- write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); +- break; +- +- case HLSL_IR_RESOURCE_LOAD: +- write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); +- break; +- +- case HLSL_IR_RESOURCE_STORE: +- write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); +- break; +- +- case HLSL_IR_LOOP: +- write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); +- break; +- +- case HLSL_IR_STORE: +- write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); +- break; +- +- case HLSL_IR_SWIZZLE: +- write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); +- break; +- +- default: +- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); +- } +- } +-} +- +-static void write_sm4_shdr(struct hlsl_ctx *ctx, +- const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +-{ +- const struct hlsl_profile_info *profile = ctx->profile; +- const struct hlsl_ir_var **extern_resources; +- struct vkd3d_bytecode_buffer buffer = {0}; +- unsigned int extern_resources_count, i; +- const struct hlsl_buffer *cbuffer; +- const struct hlsl_ir_var *var; +- size_t token_count_position; +- +- static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = +- { +- VKD3D_SM4_PS, +- VKD3D_SM4_VS, +- VKD3D_SM4_GS, +- VKD3D_SM5_HS, +- VKD3D_SM5_DS, +- VKD3D_SM5_CS, +- 0, /* EFFECT */ +- 0, /* TEXTURE */ +- VKD3D_SM4_LIB, +- }; +- +- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); +- +- put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); +- token_count_position = put_u32(&buffer, 0); +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (cbuffer->reg.allocated) +- write_sm4_dcl_constant_buffer(&buffer, cbuffer); +- } +- +- for (i = 0; i < extern_resources_count; ++i) +- { +- var = extern_resources[i]; +- +- if (var->data_type->base_type == HLSL_TYPE_SAMPLER) +- write_sm4_dcl_sampler(&buffer, var); +- else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) +- write_sm4_dcl_texture(&buffer, var); +- } +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) +- write_sm4_dcl_semantic(ctx, &buffer, var); +- } +- +- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) +- write_sm4_dcl_thread_group(&buffer, ctx->thread_count); +- +- if (ctx->temp_count) +- write_sm4_dcl_temps(&buffer, ctx->temp_count); +- +- write_sm4_block(ctx, &buffer, &entry_func->body); +- +- write_sm4_ret(&buffer); +- +- set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); +- +- dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size); +- +- vkd3d_free(extern_resources); +-} +- +-int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +-{ +- struct dxbc_writer dxbc; +- size_t i; +- int ret; +- +- dxbc_writer_init(&dxbc); +- +- write_sm4_signature(ctx, &dxbc, false); +- write_sm4_signature(ctx, &dxbc, true); +- write_sm4_rdef(ctx, &dxbc); +- write_sm4_shdr(ctx, entry_func, &dxbc); +- +- if (!(ret = ctx->result)) +- ret = dxbc_writer_write(&dxbc, out); +- for (i = 0; i < dxbc.section_count; ++i) +- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); +- return ret; +-} +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +new file mode 100644 +index 00000000000..d542fbb0d52 +--- /dev/null ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -0,0 +1,1073 @@ ++/* ++ * Copyright 2023 Conor McCarthy for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#include "vkd3d_shader_private.h" ++ ++static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) ++{ ++ return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; ++} ++ ++static bool shader_instruction_is_dcl(const struct vkd3d_shader_instruction *ins) ++{ ++ return (VKD3DSIH_DCL <= ins->handler_idx && ins->handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) ++ || ins->handler_idx == VKD3DSIH_HS_DECLS; ++} ++ ++static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) ++{ ++ ins->handler_idx = VKD3DSIH_NOP; ++ ins->dst_count = 0; ++ ins->src_count = 0; ++ ins->dst = NULL; ++ ins->src = NULL; ++} ++ ++static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg, ++ unsigned int instance_id) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < reg->idx_count; ++i) ++ { ++ if (reg->idx[i].rel_addr && shader_register_is_phase_instance_id(®->idx[i].rel_addr->reg)) ++ { ++ reg->idx[i].rel_addr = NULL; ++ reg->idx[i].offset += instance_id; ++ } ++ } ++} ++ ++static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_instruction *ins, ++ unsigned int instance_id) ++{ ++ struct vkd3d_shader_register *reg; ++ unsigned int i; ++ ++ for (i = 0; i < ins->src_count; ++i) ++ { ++ reg = (struct vkd3d_shader_register *)&ins->src[i].reg; ++ if (shader_register_is_phase_instance_id(reg)) ++ { ++ reg->type = VKD3DSPR_IMMCONST; ++ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; ++ reg->non_uniform = false; ++ reg->idx[0].offset = ~0u; ++ reg->idx[0].rel_addr = NULL; ++ reg->idx[1].offset = ~0u; ++ reg->idx[1].rel_addr = NULL; ++ reg->idx[2].offset = ~0u; ++ reg->idx[2].rel_addr = NULL; ++ reg->idx_count = 0; ++ reg->immconst_type = VKD3D_IMMCONST_SCALAR; ++ reg->u.immconst_uint[0] = instance_id; ++ continue; ++ } ++ shader_register_eliminate_phase_addressing(reg, instance_id); ++ } ++ ++ for (i = 0; i < ins->dst_count; ++i) ++ shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); ++} ++ ++struct hull_flattener ++{ ++ struct vkd3d_shader_instruction_array instructions; ++ ++ unsigned int max_temp_count; ++ unsigned int temp_dcl_idx; ++ ++ unsigned int instance_count; ++ unsigned int phase_body_idx; ++ enum vkd3d_shader_opcode phase; ++}; ++ ++static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener) ++{ ++ return flattener->phase == VKD3DSIH_HS_FORK_PHASE || flattener->phase == VKD3DSIH_HS_JOIN_PHASE; ++} ++ ++struct shader_phase_location ++{ ++ unsigned int index; ++ unsigned int instance_count; ++ unsigned int instruction_count; ++}; ++ ++struct shader_phase_location_array ++{ ++ /* Unlikely worst case: one phase for each component of each output register. */ ++ struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE]; ++ unsigned int count; ++}; ++ ++static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser, ++ unsigned int index, struct shader_phase_location_array *locations) ++{ ++ struct vkd3d_shader_instruction *ins = &normaliser->instructions.elements[index]; ++ struct shader_phase_location *loc; ++ bool b; ++ ++ if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) ++ { ++ b = flattener_is_in_fork_or_join_phase(normaliser); ++ /* Reset the phase info. */ ++ normaliser->phase_body_idx = ~0u; ++ normaliser->phase = ins->handler_idx; ++ normaliser->instance_count = 1; ++ /* Leave the first occurrence and delete the rest. */ ++ if (b) ++ vkd3d_shader_instruction_make_nop(ins); ++ return; ++ } ++ else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT ++ || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) ++ { ++ normaliser->instance_count = ins->declaration.count + !ins->declaration.count; ++ vkd3d_shader_instruction_make_nop(ins); ++ return; ++ } ++ else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( ++ &ins->declaration.dst.reg)) ++ { ++ vkd3d_shader_instruction_make_nop(ins); ++ return; ++ } ++ else if (ins->handler_idx == VKD3DSIH_DCL_TEMPS && normaliser->phase != VKD3DSIH_INVALID) ++ { ++ /* Leave only the first temp declaration and set it to the max count later. */ ++ if (!normaliser->max_temp_count) ++ normaliser->temp_dcl_idx = index; ++ else ++ vkd3d_shader_instruction_make_nop(ins); ++ normaliser->max_temp_count = max(normaliser->max_temp_count, ins->declaration.count); ++ return; ++ } ++ ++ if (normaliser->phase == VKD3DSIH_INVALID || shader_instruction_is_dcl(ins)) ++ return; ++ ++ if (normaliser->phase_body_idx == ~0u) ++ normaliser->phase_body_idx = index; ++ ++ if (ins->handler_idx == VKD3DSIH_RET) ++ { ++ vkd3d_shader_instruction_make_nop(ins); ++ if (locations->count >= ARRAY_SIZE(locations->locations)) ++ { ++ FIXME("Insufficient space for phase location.\n"); ++ return; ++ } ++ loc = &locations->locations[locations->count++]; ++ loc->index = normaliser->phase_body_idx; ++ loc->instance_count = normaliser->instance_count; ++ loc->instruction_count = index - normaliser->phase_body_idx; ++ } ++} ++ ++static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser, ++ struct shader_phase_location_array *locations) ++{ ++ struct shader_phase_location *loc; ++ unsigned int i, j, k, end, count; ++ ++ for (i = 0, count = 0; i < locations->count; ++i) ++ count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count; ++ ++ if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ end = normaliser->instructions.count; ++ normaliser->instructions.count += count; ++ ++ for (i = locations->count; i > 0; --i) ++ { ++ loc = &locations->locations[i - 1]; ++ j = loc->index + loc->instruction_count; ++ memmove(&normaliser->instructions.elements[j + count], &normaliser->instructions.elements[j], ++ (end - j) * sizeof(*normaliser->instructions.elements)); ++ end = j; ++ count -= (loc->instance_count - 1) * loc->instruction_count; ++ loc->index += count; ++ } ++ ++ for (i = 0, count = 0; i < locations->count; ++i) ++ { ++ loc = &locations->locations[i]; ++ /* Make a copy of the non-dcl instructions for each instance. */ ++ for (j = 1; j < loc->instance_count; ++j) ++ { ++ for (k = 0; k < loc->instruction_count; ++k) ++ { ++ if (!shader_instruction_array_clone_instruction(&normaliser->instructions, ++ loc->index + loc->instruction_count * j + k, loc->index + k)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ } ++ /* Replace each reference to the instance id with a constant instance id. */ ++ for (j = 0; j < loc->instance_count; ++j) ++ { ++ for (k = 0; k < loc->instruction_count; ++k) ++ shader_instruction_eliminate_phase_instance_id( ++ &normaliser->instructions.elements[loc->index + loc->instruction_count * j + k], j); ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ ++static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, ++ enum vkd3d_data_type data_type, unsigned int idx_count) ++{ ++ reg->type = reg_type; ++ reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; ++ reg->non_uniform = false; ++ reg->data_type = data_type; ++ reg->idx[0].offset = ~0u; ++ reg->idx[0].rel_addr = NULL; ++ reg->idx[1].offset = ~0u; ++ reg->idx[1].rel_addr = NULL; ++ reg->idx[2].offset = ~0u; ++ reg->idx[2].rel_addr = NULL; ++ reg->idx_count = idx_count; ++ reg->immconst_type = VKD3D_IMMCONST_SCALAR; ++} ++ ++static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) ++{ ++ memset(ins, 0, sizeof(*ins)); ++ ins->handler_idx = handler_idx; ++} ++ ++enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) ++{ ++ struct hull_flattener flattener = {*src_instructions}; ++ struct vkd3d_shader_instruction_array *instructions; ++ struct shader_phase_location_array locations; ++ enum vkd3d_result result = VKD3D_OK; ++ unsigned int i; ++ ++ instructions = &flattener.instructions; ++ ++ flattener.phase = VKD3DSIH_INVALID; ++ for (i = 0, locations.count = 0; i < instructions->count; ++i) ++ flattener_eliminate_phase_related_dcls(&flattener, i, &locations); ++ ++ if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) ++ return result; ++ ++ if (flattener.phase != VKD3DSIH_INVALID) ++ { ++ if (flattener.temp_dcl_idx) ++ instructions->elements[flattener.temp_dcl_idx].declaration.count = flattener.max_temp_count; ++ ++ if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ shader_instruction_init(&instructions->elements[instructions->count++], VKD3DSIH_RET); ++ } ++ ++ *src_instructions = flattener.instructions; ++ return result; ++} ++ ++struct control_point_normaliser ++{ ++ struct vkd3d_shader_instruction_array instructions; ++ enum vkd3d_shader_opcode phase; ++ struct vkd3d_shader_src_param *outpointid_param; ++}; ++ ++static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) ++{ ++ return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; ++} ++ ++static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( ++ struct vkd3d_shader_instruction_array *instructions) ++{ ++ struct vkd3d_shader_src_param *rel_addr; ++ ++ if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) ++ return NULL; ++ ++ shader_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); ++ rel_addr->swizzle = 0; ++ rel_addr->modifiers = 0; ++ ++ return rel_addr; ++} ++ ++static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, ++ struct control_point_normaliser *normaliser) ++{ ++ struct vkd3d_shader_register *reg = &dst_param->reg; ++ ++ if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) ++ { ++ /* The TPF reader validates idx_count. */ ++ assert(reg->idx_count == 1); ++ reg->idx[1] = reg->idx[0]; ++ /* The control point id param is implicit here. Avoid later complications by inserting it. */ ++ reg->idx[0].offset = 0; ++ reg->idx[0].rel_addr = normaliser->outpointid_param; ++ ++reg->idx_count; ++ } ++} ++ ++static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, ++ enum vkd3d_shader_register_type reg_type, unsigned int idx_count) ++{ ++ param->write_mask = e->mask; ++ param->modifiers = 0; ++ param->shift = 0; ++ shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); ++} ++ ++static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, ++ const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst) ++{ ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_dst_param *param; ++ const struct signature_element *e; ++ unsigned int i, count; ++ ++ for (i = 0, count = 1; i < s->element_count; ++i) ++ count += !!s->elements[i].used_mask; ++ ++ if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst], ++ (normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements)); ++ normaliser->instructions.count += count; ++ ++ ins = &normaliser->instructions.elements[dst]; ++ shader_instruction_init(ins, VKD3DSIH_HS_CONTROL_POINT_PHASE); ++ ins->flags = 1; ++ ++ins; ++ ++ for (i = 0; i < s->element_count; ++i) ++ { ++ e = &s->elements[i]; ++ if (!e->used_mask) ++ continue; ++ ++ if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) ++ { ++ shader_instruction_init(ins, VKD3DSIH_DCL_INPUT_SIV); ++ param = &ins->declaration.register_semantic.reg; ++ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); ++ } ++ else ++ { ++ shader_instruction_init(ins, VKD3DSIH_DCL_INPUT); ++ param = &ins->declaration.dst; ++ } ++ ++ shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); ++ param->reg.idx[0].offset = input_control_point_count; ++ param->reg.idx[1].offset = i; ++ ++ ++ins; ++ } ++ ++ return VKD3D_OK; ++} ++ ++enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( ++ struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) ++{ ++ struct vkd3d_shader_instruction_array *instructions; ++ struct control_point_normaliser normaliser; ++ unsigned int input_control_point_count; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_result ret; ++ unsigned int i, j; ++ ++ if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) ++ { ++ ERR("Failed to allocate src param.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ normaliser.instructions = *src_instructions; ++ instructions = &normaliser.instructions; ++ normaliser.phase = VKD3DSIH_INVALID; ++ ++ for (i = 0; i < normaliser.instructions.count; ++i) ++ { ++ ins = &instructions->elements[i]; ++ ++ switch (ins->handler_idx) ++ { ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ normaliser.phase = ins->handler_idx; ++ break; ++ default: ++ if (shader_instruction_is_dcl(ins)) ++ break; ++ for (j = 0; j < ins->dst_count; ++j) ++ shader_dst_param_normalise_outpointid((struct vkd3d_shader_dst_param *)&ins->dst[j], &normaliser); ++ break; ++ } ++ } ++ ++ normaliser.phase = VKD3DSIH_INVALID; ++ input_control_point_count = 1; ++ ++ for (i = 0; i < instructions->count; ++i) ++ { ++ ins = &instructions->elements[i]; ++ ++ switch (ins->handler_idx) ++ { ++ case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: ++ input_control_point_count = ins->declaration.count; ++ break; ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ *src_instructions = normaliser.instructions; ++ return VKD3D_OK; ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, ++ input_control_point_count, i); ++ *src_instructions = normaliser.instructions; ++ return ret; ++ default: ++ break; ++ } ++ } ++ ++ *src_instructions = normaliser.instructions; ++ return VKD3D_OK; ++} ++ ++struct io_normaliser ++{ ++ struct vkd3d_shader_instruction_array instructions; ++ enum vkd3d_shader_type shader_type; ++ struct shader_signature *input_signature; ++ struct shader_signature *output_signature; ++ struct shader_signature *patch_constant_signature; ++ ++ unsigned int max_temp_count; ++ unsigned int temp_dcl_idx; ++ ++ unsigned int instance_count; ++ unsigned int phase_body_idx; ++ enum vkd3d_shader_opcode phase; ++ unsigned int output_control_point_count; ++ ++ struct vkd3d_shader_src_param *outpointid_param; ++ ++ struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; ++ struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; ++ struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT]; ++ uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; ++ uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; ++ uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; ++}; ++ ++static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *normaliser) ++{ ++ return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; ++} ++ ++static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) ++{ ++ return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; ++} ++ ++static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, ++ unsigned int reg_idx, unsigned int write_mask) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ struct signature_element *e = &signature->elements[i]; ++ if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx ++ && (e->mask & write_mask) == write_mask) ++ { ++ return i; ++ } ++ } ++ ++ /* Validated in the TPF reader. */ ++ vkd3d_unreachable(); ++} ++ ++static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], ++ unsigned int register_idx, unsigned int write_mask) ++{ ++ return range_map[register_idx][vkd3d_write_mask_get_component_idx(write_mask)]; ++} ++ ++static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx, ++ unsigned int register_count, unsigned int write_mask, bool is_dcl_indexrange) ++{ ++ unsigned int i, j, r, c, component_idx, component_count; ++ ++ assert(write_mask <= VKD3DSP_WRITEMASK_ALL); ++ component_idx = vkd3d_write_mask_get_component_idx(write_mask); ++ component_count = vkd3d_write_mask_component_count(write_mask); ++ ++ assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); ++ ++ if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) ++ { ++ /* Validated in the TPF reader. */ ++ assert(range_map[register_idx][component_idx] != UINT8_MAX); ++ return; ++ } ++ if (range_map[register_idx][component_idx] == register_count) ++ { ++ /* Already done. This happens when fxc splits a register declaration by ++ * component(s). The dcl_indexrange instructions are split too. */ ++ return; ++ } ++ range_map[register_idx][component_idx] = register_count; ++ ++ for (i = 0; i < register_count; ++i) ++ { ++ r = register_idx + i; ++ for (j = !i; j < component_count; ++j) ++ { ++ c = component_idx + j; ++ /* A synthetic patch constant range which overlaps an existing range can start upstream of it ++ * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. ++ * The latter is validated in the TPF reader. */ ++ assert(!range_map[r][c] || !is_dcl_indexrange); ++ range_map[r][c] = UINT8_MAX; ++ } ++ } ++} ++ ++static void io_normaliser_add_index_range(struct io_normaliser *normaliser, ++ const struct vkd3d_shader_instruction *ins) ++{ ++ const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; ++ const struct vkd3d_shader_register *reg = &range->dst.reg; ++ unsigned int reg_idx, write_mask, element_idx; ++ const struct shader_signature *signature; ++ uint8_t (*range_map)[VKD3D_VEC4_SIZE]; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_INCONTROLPOINT: ++ range_map = normaliser->input_range_map; ++ signature = normaliser->input_signature; ++ break; ++ case VKD3DSPR_OUTCONTROLPOINT: ++ range_map = normaliser->output_range_map; ++ signature = normaliser->output_signature; ++ break; ++ case VKD3DSPR_OUTPUT: ++ if (!io_normaliser_is_in_fork_or_join_phase(normaliser)) ++ { ++ range_map = normaliser->output_range_map; ++ signature = normaliser->output_signature; ++ break; ++ } ++ /* fall through */ ++ case VKD3DSPR_PATCHCONST: ++ range_map = normaliser->pc_range_map; ++ signature = normaliser->patch_constant_signature; ++ break; ++ default: ++ /* Validated in the TPF reader. */ ++ vkd3d_unreachable(); ++ } ++ ++ reg_idx = reg->idx[reg->idx_count - 1].offset; ++ write_mask = range->dst.write_mask; ++ element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); ++ range_map_set_register_range(range_map, reg_idx, range->register_count, ++ signature->elements[element_idx].mask, true); ++} ++ ++static int signature_element_mask_compare(const void *a, const void *b) ++{ ++ const struct signature_element *e = a, *f = b; ++ int ret; ++ ++ return (ret = vkd3d_u32_compare(e->mask, f->mask)) ? ret : vkd3d_u32_compare(e->register_index, f->register_index); ++} ++ ++static bool sysval_semantics_should_merge(const struct signature_element *e, const struct signature_element *f) ++{ ++ if (e->sysval_semantic < VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE ++ || e->sysval_semantic > VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) ++ return false; ++ ++ return e->sysval_semantic == f->sysval_semantic ++ /* Line detail and density must be merged together to match the SPIR-V array. ++ * This deletes one of the two sysvals, but these are not used. */ ++ || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET ++ && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) ++ || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN ++ && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET); ++} ++ ++/* Merge tess factor sysvals because they are an array in SPIR-V. */ ++static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s, ++ uint8_t range_map[][VKD3D_VEC4_SIZE]) ++{ ++ struct signature_element *e, *f; ++ unsigned int i, j, register_count; ++ ++ qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare); ++ ++ for (i = 0; i < s->element_count; i += register_count) ++ { ++ e = &s->elements[i]; ++ register_count = 1; ++ ++ if (!e->sysval_semantic) ++ continue; ++ ++ for (j = i + 1; j < s->element_count; ++j, ++register_count) ++ { ++ f = &s->elements[j]; ++ if (f->register_index != e->register_index + register_count || !sysval_semantics_should_merge(e, f)) ++ break; ++ } ++ if (register_count < 2) ++ continue; ++ ++ range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false); ++ } ++} ++ ++static int signature_element_register_compare(const void *a, const void *b) ++{ ++ const struct signature_element *e = a, *f = b; ++ ++ return vkd3d_u32_compare(e->register_index, f->register_index); ++} ++ ++static int signature_element_index_compare(const void *a, const void *b) ++{ ++ const struct signature_element *e = a, *f = b; ++ ++ return vkd3d_u32_compare(e->sort_index, f->sort_index); ++} ++ ++static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], ++ bool is_patch_constant) ++{ ++ unsigned int i, j, element_count, new_count, register_count; ++ struct signature_element *elements; ++ struct signature_element *e, *f; ++ ++ element_count = s->element_count; ++ if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) ++ return false; ++ memcpy(elements, s->elements, element_count * sizeof(*elements)); ++ ++ qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); ++ ++ for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) ++ { ++ e = &elements[i]; ++ j = i + 1; ++ ++ if (e->register_index == ~0u) ++ continue; ++ ++ /* Do not merge if the register index will be relative-addressed. */ ++ if (range_map_get_register_count(range_map, e->register_index, e->mask) > 1) ++ continue; ++ ++ for (; j < element_count; ++j) ++ { ++ f = &elements[j]; ++ ++ /* Merge different components of the same register unless sysvals are different, ++ * or it will be relative-addressed. */ ++ if (f->register_index != e->register_index || f->sysval_semantic != e->sysval_semantic ++ || range_map_get_register_count(range_map, f->register_index, f->mask) > 1) ++ break; ++ ++ TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, ++ e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); ++ assert(!(e->mask & f->mask)); ++ ++ e->mask |= f->mask; ++ e->used_mask |= f->used_mask; ++ e->semantic_index = min(e->semantic_index, f->semantic_index); ++ } ++ } ++ element_count = new_count; ++ /* Signature 's' is a copy of the original signature struct, so we can replace ++ * the 'elements' pointer without freeing it. */ ++ s->elements = elements; ++ s->element_count = element_count; ++ ++ if (is_patch_constant) ++ shader_signature_map_patch_constant_index_ranges(s, range_map); ++ ++ for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e) ++ { ++ e = &elements[i]; ++ register_count = 1; ++ ++ if (e->register_index >= MAX_REG_OUTPUT) ++ continue; ++ ++ register_count = range_map_get_register_count(range_map, e->register_index, e->mask); ++ assert(register_count != UINT8_MAX); ++ register_count += !register_count; ++ ++ if (register_count > 1) ++ { ++ TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); ++ e->register_count = register_count; ++ } ++ } ++ element_count = new_count; ++ ++ /* Restoring the original order is required for sensible trace output. */ ++ qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare); ++ ++ s->element_count = element_count; ++ ++ return true; ++} ++ ++static bool sysval_semantic_is_tess_factor(enum vkd3d_shader_sysval_semantic sysval_semantic) ++{ ++ return sysval_semantic >= VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE ++ && sysval_semantic <= VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; ++} ++ ++static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, ++ unsigned int id_idx, unsigned int register_index) ++{ ++ assert(id_idx < ARRAY_SIZE(reg->idx) - 1); ++ ++ /* For a relative-addressed register index, move the id up a slot to separate it from the address, ++ * because rel_addr can be replaced with a constant offset in some cases. */ ++ if (reg->idx[id_idx].rel_addr) ++ { ++ reg->idx[id_idx + 1].rel_addr = NULL; ++ reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; ++ reg->idx[id_idx].offset -= register_index; ++ ++id_idx; ++ } ++ /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where ++ * tessellation level registers are merged into an array because they're an array in SPIR-V. */ ++ else ++ { ++ ++id_idx; ++ memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); ++ reg->idx[0].rel_addr = NULL; ++ reg->idx[0].offset = reg->idx[id_idx].offset - register_index; ++ } ++ ++ return id_idx; ++} ++ ++static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, ++ struct io_normaliser *normaliser) ++ { ++ unsigned int id_idx, reg_idx, write_mask, element_idx; ++ struct vkd3d_shader_register *reg = &dst_param->reg; ++ struct vkd3d_shader_dst_param **dcl_params; ++ const struct shader_signature *signature; ++ const struct signature_element *e; ++ ++ if ((reg->type == VKD3DSPR_OUTPUT && io_normaliser_is_in_fork_or_join_phase(normaliser)) ++ || reg->type == VKD3DSPR_PATCHCONST) ++ { ++ signature = normaliser->patch_constant_signature; ++ /* Convert patch constant outputs to the patch constant register type to avoid the need ++ * to convert compiler symbols when accessed as inputs in a later stage. */ ++ reg->type = VKD3DSPR_PATCHCONST; ++ dcl_params = normaliser->pc_dcl_params; ++ } ++ else if (reg->type == VKD3DSPR_OUTPUT || dst_param->reg.type == VKD3DSPR_COLOROUT) ++ { ++ signature = normaliser->output_signature; ++ dcl_params = normaliser->output_dcl_params; ++ } ++ else if (dst_param->reg.type == VKD3DSPR_INCONTROLPOINT || dst_param->reg.type == VKD3DSPR_INPUT) ++ { ++ signature = normaliser->input_signature; ++ dcl_params = normaliser->input_dcl_params; ++ } ++ else ++ { ++ return true; ++ } ++ ++ id_idx = reg->idx_count - 1; ++ reg_idx = reg->idx[id_idx].offset; ++ write_mask = dst_param->write_mask; ++ element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); ++ e = &signature->elements[element_idx]; ++ ++ dst_param->write_mask >>= vkd3d_write_mask_get_component_idx(e->mask); ++ if (is_io_dcl) ++ { ++ /* Validated in the TPF reader. */ ++ assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); ++ ++ if (dcl_params[element_idx]) ++ { ++ /* Merge split declarations into a single one. */ ++ dcl_params[element_idx]->write_mask |= dst_param->write_mask; ++ /* Turn this into a nop. */ ++ return false; ++ } ++ else ++ { ++ dcl_params[element_idx] = dst_param; ++ } ++ } ++ ++ if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) ++ { ++ if (is_io_dcl) ++ { ++ /* Emit an array size for the control points for consistency with inputs. */ ++ reg->idx[0].offset = normaliser->output_control_point_count; ++ } ++ else ++ { ++ /* The control point id param. */ ++ assert(reg->idx[0].rel_addr); ++ } ++ id_idx = 1; ++ } ++ ++ if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) ++ { ++ if (is_io_dcl) ++ { ++ /* For control point I/O, idx 0 contains the control point count. ++ * Ensure it is moved up to the next slot. */ ++ reg->idx[id_idx].offset = reg->idx[0].offset; ++ reg->idx[0].offset = e->register_count; ++ ++id_idx; ++ } ++ else ++ { ++ id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); ++ } ++ } ++ ++ /* Replace the register index with the signature element index */ ++ reg->idx[id_idx].offset = element_idx; ++ reg->idx_count = id_idx + 1; ++ ++ return true; ++} ++ ++static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_param, ++ struct io_normaliser *normaliser) ++{ ++ unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx; ++ struct vkd3d_shader_register *reg = &src_param->reg; ++ const struct shader_signature *signature; ++ const struct signature_element *e; ++ ++ /* Input/output registers from one phase can be used as inputs in ++ * subsequent phases. Specifically: ++ * ++ * - Control phase inputs are available as "vicp" in fork and join ++ * phases. ++ * - Control phase outputs are available as "vocp" in fork and join ++ * phases. ++ * - Fork phase patch constants are available as "vpc" in join ++ * phases. ++ * ++ * We handle "vicp" here by converting INCONTROLPOINT src registers to ++ * type INPUT so they match the control phase declarations. We handle ++ * "vocp" by converting OUTCONTROLPOINT registers to type OUTPUT. ++ * Merging fork and join phases handles "vpc". */ ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_PATCHCONST: ++ signature = normaliser->patch_constant_signature; ++ break; ++ case VKD3DSPR_INCONTROLPOINT: ++ if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) ++ reg->type = VKD3DSPR_INPUT; ++ /* fall through */ ++ case VKD3DSPR_INPUT: ++ signature = normaliser->input_signature; ++ break; ++ case VKD3DSPR_OUTCONTROLPOINT: ++ if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) ++ reg->type = VKD3DSPR_OUTPUT; ++ /* fall through */ ++ case VKD3DSPR_OUTPUT: ++ signature = normaliser->output_signature; ++ break; ++ default: ++ return; ++ } ++ ++ id_idx = reg->idx_count - 1; ++ reg_idx = reg->idx[id_idx].offset; ++ write_mask = VKD3DSP_WRITEMASK_0 << vkd3d_swizzle_get_component(src_param->swizzle, 0); ++ element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); ++ ++ e = &signature->elements[element_idx]; ++ if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) ++ id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); ++ reg->idx[id_idx].offset = element_idx; ++ reg->idx_count = id_idx + 1; ++ ++ if ((component_idx = vkd3d_write_mask_get_component_idx(e->mask))) ++ { ++ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) ++ if (vkd3d_swizzle_get_component(src_param->swizzle, i)) ++ src_param->swizzle -= component_idx << VKD3D_SHADER_SWIZZLE_SHIFT(i); ++ } ++} ++ ++static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, ++ struct io_normaliser *normaliser) ++{ ++ struct vkd3d_shader_register *reg; ++ bool keep = true; ++ unsigned int i; ++ ++ switch (ins->handler_idx) ++ { ++ case VKD3DSIH_DCL_INPUT: ++ if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) ++ { ++ reg = &ins->declaration.dst.reg; ++ /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their ++ * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ ++ if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) ++ vkd3d_shader_instruction_make_nop(ins); ++ else if (reg->type == VKD3DSPR_INCONTROLPOINT) ++ reg->type = VKD3DSPR_INPUT; ++ } ++ /* fall through */ ++ case VKD3DSIH_DCL_INPUT_PS: ++ case VKD3DSIH_DCL_OUTPUT: ++ keep = shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser); ++ break; ++ case VKD3DSIH_DCL_INPUT_SGV: ++ case VKD3DSIH_DCL_INPUT_SIV: ++ case VKD3DSIH_DCL_INPUT_PS_SGV: ++ case VKD3DSIH_DCL_INPUT_PS_SIV: ++ case VKD3DSIH_DCL_OUTPUT_SIV: ++ keep = shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, ++ normaliser); ++ break; ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ normaliser->phase = ins->handler_idx; ++ memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); ++ memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); ++ memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); ++ break; ++ default: ++ if (shader_instruction_is_dcl(ins)) ++ break; ++ for (i = 0; i < ins->dst_count; ++i) ++ shader_dst_param_io_normalise((struct vkd3d_shader_dst_param *)&ins->dst[i], false, normaliser); ++ for (i = 0; i < ins->src_count; ++i) ++ shader_src_param_io_normalise((struct vkd3d_shader_src_param *)&ins->src[i], normaliser); ++ break; ++ } ++ ++ if (!keep) ++ shader_instruction_init(ins, VKD3DSIH_NOP); ++} ++ ++enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, ++ enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, ++ struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) ++{ ++ struct io_normaliser normaliser = {*instructions}; ++ struct vkd3d_shader_instruction *ins; ++ bool has_control_point_phase; ++ unsigned int i, j; ++ ++ normaliser.phase = VKD3DSIH_INVALID; ++ normaliser.shader_type = shader_type; ++ normaliser.input_signature = input_signature; ++ normaliser.output_signature = output_signature; ++ normaliser.patch_constant_signature = patch_constant_signature; ++ ++ for (i = 0, has_control_point_phase = false; i < instructions->count; ++i) ++ { ++ ins = &instructions->elements[i]; ++ ++ switch (ins->handler_idx) ++ { ++ case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: ++ normaliser.output_control_point_count = ins->declaration.count; ++ break; ++ case VKD3DSIH_DCL_INDEX_RANGE: ++ io_normaliser_add_index_range(&normaliser, ins); ++ vkd3d_shader_instruction_make_nop(ins); ++ break; ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ has_control_point_phase = true; ++ /* fall through */ ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: ++ normaliser.phase = ins->handler_idx; ++ break; ++ default: ++ break; ++ } ++ } ++ ++ if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) ++ { ++ /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ ++ for (i = 0; i < MAX_REG_OUTPUT; ++i) ++ { ++ for (j = 0; j < VKD3D_VEC4_SIZE; ++j) ++ { ++ if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) ++ normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; ++ else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) ++ normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; ++ else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); ++ } ++ } ++ } ++ ++ if (!shader_signature_merge(input_signature, normaliser.input_range_map, false) ++ || !shader_signature_merge(output_signature, normaliser.output_range_map, false) ++ || !shader_signature_merge(patch_constant_signature, normaliser.pc_range_map, true)) ++ { ++ *instructions = normaliser.instructions; ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ normaliser.phase = VKD3DSIH_INVALID; ++ for (i = 0; i < normaliser.instructions.count; ++i) ++ shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); ++ ++ *instructions = normaliser.instructions; ++ return VKD3D_OK; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h +index 4860cf5f90e..e1cb75e177c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.h ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h +@@ -22,7 +22,7 @@ + #define __VKD3D_SHADER_PREPROC_H + + #include "vkd3d_shader_private.h" +-#include "wine/rbtree.h" ++#include "rbtree.h" + + struct preproc_if_state + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l +index bb5a6b61de1..8e35e3555ba 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l +@@ -20,7 +20,6 @@ + + %{ + +-#include "preproc.h" + #include "preproc.tab.h" + + #undef ERROR /* defined in wingdi.h */ +@@ -41,6 +40,7 @@ static void update_location(struct preproc_ctx *ctx); + %option bison-locations + %option extra-type="struct preproc_ctx *" + %option never-interactive ++%option nodefault + %option noinput + %option nounput + %option noyy_top_state +@@ -75,6 +75,7 @@ INT_SUFFIX [uUlL]{0,2} + "*/" {yy_pop_state(yyscanner);} + <> {yy_pop_state(yyscanner);} + . {} ++\n {} + + (\\{NEWLINE}|[^\n])* {return T_STRING;} + +@@ -176,9 +177,9 @@ INT_SUFFIX [uUlL]{0,2} + return T_NEWLINE; + } + +-{WS}+ {} ++{WS}+ {} + [-()\[\]{},+!*/<>&|^?:] {return yytext[0];} +-. {return T_TEXT;} ++. {return T_TEXT;} + + %% + +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 53e13735937..cc0b63e8284 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -18,7 +18,7 @@ + */ + + #include "vkd3d_shader_private.h" +-#include "wine/rbtree.h" ++#include "rbtree.h" + + #include + #include +@@ -168,7 +168,7 @@ static void vkd3d_spirv_validate(const struct vkd3d_shader_code *spirv, + + #endif /* HAVE_SPIRV_TOOLS */ + +-static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, ++enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index) + { + switch (sysval) +@@ -199,11 +199,6 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu + } + } + +-static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) +-{ +- return vkd3d_siv_from_sysval_indexed(sysval, 0); +-} +- + #define VKD3D_SPIRV_VERSION 0x00010000 + #define VKD3D_SPIRV_GENERATOR_ID 18 + #define VKD3D_SPIRV_GENERATOR_VERSION 7 +@@ -1967,11 +1962,9 @@ struct vkd3d_symbol_register_data + uint32_t member_idx; + enum vkd3d_shader_component_type component_type; + unsigned int write_mask; +- uint32_t dcl_mask; + unsigned int structure_stride; + unsigned int binding_base_idx; + bool is_aggregate; /* An aggregate, i.e. a structure or an array. */ +- bool is_dynamically_indexed; /* If member_idx is a variable ID instead of a constant. */ + }; + + struct vkd3d_symbol_resource_data +@@ -2064,10 +2057,14 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, + symbol->type = VKD3D_SYMBOL_REGISTER; + memset(&symbol->key, 0, sizeof(symbol->key)); + symbol->key.reg.type = reg->type; +- if (vkd3d_shader_register_is_input(reg) && reg->idx[1].offset != ~0u) +- symbol->key.reg.idx = reg->idx[1].offset; ++ if (vkd3d_shader_register_is_input(reg) || vkd3d_shader_register_is_output(reg) ++ || vkd3d_shader_register_is_patch_constant(reg)) ++ { ++ symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; ++ assert(!reg->idx_count || symbol->key.reg.idx != ~0u); ++ } + else if (reg->type != VKD3DSPR_IMMCONSTBUFFER) +- symbol->key.reg.idx = reg->idx[0].offset; ++ symbol->key.reg.idx = reg->idx_count ? reg->idx[0].offset : ~0u; + } + + static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, +@@ -2080,11 +2077,9 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, + symbol->info.reg.member_idx = 0; + symbol->info.reg.component_type = component_type; + symbol->info.reg.write_mask = write_mask; +- symbol->info.reg.dcl_mask = 0; + symbol->info.reg.structure_stride = 0; + symbol->info.reg.binding_base_idx = 0; + symbol->info.reg.is_aggregate = false; +- symbol->info.reg.is_dynamically_indexed = false; + } + + static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, +@@ -2197,11 +2192,7 @@ struct vkd3d_push_constant_buffer_binding + + struct vkd3d_shader_phase + { +- enum vkd3d_shader_opcode type; +- unsigned int idx; +- unsigned int instance_count; + uint32_t function_id; +- uint32_t instance_id; + size_t function_location; + }; + +@@ -2253,10 +2244,11 @@ struct spirv_compiler + struct vkd3d_push_constant_buffer_binding *push_constants; + const struct vkd3d_shader_spirv_target_info *spirv_target_info; + ++ bool main_block_open; + bool after_declarations_section; +- const struct vkd3d_shader_signature *input_signature; +- const struct vkd3d_shader_signature *output_signature; +- const struct vkd3d_shader_signature *patch_constant_signature; ++ struct shader_signature input_signature; ++ struct shader_signature output_signature; ++ struct shader_signature patch_constant_signature; + const struct vkd3d_shader_transform_feedback_info *xfb_info; + struct vkd3d_shader_output_info + { +@@ -2276,9 +2268,10 @@ struct spirv_compiler + unsigned int output_control_point_count; + bool use_vocp; + +- unsigned int shader_phase_count; +- struct vkd3d_shader_phase *shader_phases; +- size_t shader_phases_size; ++ enum vkd3d_shader_opcode phase; ++ bool emit_default_control_point_phase; ++ struct vkd3d_shader_phase control_point_phase; ++ struct vkd3d_shader_phase patch_constant_phase; + + uint32_t current_spec_constant_id; + unsigned int spec_constant_count; +@@ -2290,9 +2283,19 @@ struct spirv_compiler + struct vkd3d_string_buffer_cache string_buffers; + }; + +-static bool is_control_point_phase(const struct vkd3d_shader_phase *phase) ++static bool is_in_default_phase(const struct spirv_compiler *compiler) ++{ ++ return compiler->phase == VKD3DSIH_INVALID; ++} ++ ++static bool is_in_control_point_phase(const struct spirv_compiler *compiler) ++{ ++ return compiler->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; ++} ++ ++static bool is_in_fork_or_join_phase(const struct spirv_compiler *compiler) + { +- return phase && phase->type == VKD3DSIH_HS_CONTROL_POINT_PHASE; ++ return compiler->phase == VKD3DSIH_HS_FORK_PHASE || compiler->phase == VKD3DSIH_HS_JOIN_PHASE; + } + + static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler); +@@ -2304,13 +2307,33 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil + return info && info->entry_point ? info->entry_point : "main"; + } + +-struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, ++static void spirv_compiler_destroy(struct spirv_compiler *compiler) ++{ ++ vkd3d_free(compiler->control_flow_info); ++ ++ vkd3d_free(compiler->output_info); ++ ++ vkd3d_free(compiler->push_constants); ++ vkd3d_free(compiler->descriptor_offset_ids); ++ ++ vkd3d_spirv_builder_free(&compiler->spirv_builder); ++ ++ rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); ++ ++ vkd3d_free(compiler->spec_constants); ++ ++ vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); ++ ++ vkd3d_free(compiler); ++} ++ ++static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, + const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) + { +- const struct vkd3d_shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; +- const struct vkd3d_shader_signature *output_signature = &shader_desc->output_signature; ++ const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; ++ const struct shader_signature *output_signature = &shader_desc->output_signature; + const struct vkd3d_shader_interface_info *shader_interface; + const struct vkd3d_shader_descriptor_offset_info *offset_info; + const struct vkd3d_shader_spirv_target_info *target_info; +@@ -2402,9 +2425,9 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * + + compiler->shader_type = shader_version->type; + +- compiler->input_signature = &shader_desc->input_signature; +- compiler->output_signature = &shader_desc->output_signature; +- compiler->patch_constant_signature = &shader_desc->patch_constant_signature; ++ compiler->input_signature = shader_desc->input_signature; ++ compiler->output_signature = shader_desc->output_signature; ++ compiler->patch_constant_signature = shader_desc->patch_constant_signature; + + if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) + { +@@ -2437,6 +2460,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * + + compiler->scan_descriptor_info = scan_descriptor_info; + ++ compiler->phase = VKD3DSIH_INVALID; ++ + vkd3d_string_buffer_cache_init(&compiler->string_buffers); + + spirv_compiler_emit_initial_declarations(compiler); +@@ -2857,7 +2882,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s + { + unsigned int idx; + +- idx = reg->idx[1].offset != ~0u ? reg->idx[1].offset : reg->idx[0].offset; ++ idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : 0; + switch (reg->type) + { + case VKD3DSPR_RESOURCE: +@@ -2887,12 +2912,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s + case VKD3DSPR_DEPTHOUTLE: + snprintf(buffer, buffer_size, "oDepth"); + break; +- case VKD3DSPR_FORKINSTID: +- snprintf(buffer, buffer_size, "vForkInstanceId"); +- break; +- case VKD3DSPR_JOININSTID: +- snprintf(buffer, buffer_size, "vJoinInstanceId"); +- break; + case VKD3DSPR_GSINSTID: + snprintf(buffer, buffer_size, "vGSInstanceID"); + break; +@@ -2965,18 +2984,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, + + static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, + struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, +- enum vkd3d_shader_component_type component_type, unsigned int component_count, unsigned int array_length) ++ enum vkd3d_shader_component_type component_type, unsigned int component_count, ++ const unsigned int *array_lengths, unsigned int length_count) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, length_id, ptr_type_id; ++ unsigned int i; + +- if (!array_length) ++ if (!length_count) + return spirv_compiler_emit_variable(compiler, + stream, storage_class, component_type, component_count); + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); +- length_id = spirv_compiler_get_constant_uint(compiler, array_length); +- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); ++ for (i = 0; i < length_count; ++i) ++ { ++ if (!array_lengths[i]) ++ continue; ++ length_id = spirv_compiler_get_constant_uint(compiler, array_lengths[i]); ++ type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); ++ } ++ + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); + return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); + } +@@ -3169,7 +3196,6 @@ struct vkd3d_shader_register_info + unsigned int structure_stride; + unsigned int binding_base_idx; + bool is_aggregate; +- bool is_dynamically_indexed; + }; + + static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, +@@ -3192,7 +3218,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil + register_info->structure_stride = 0; + register_info->binding_base_idx = 0; + register_info->is_aggregate = false; +- register_info->is_dynamically_indexed = false; + return true; + } + +@@ -3214,7 +3239,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil + register_info->structure_stride = symbol->info.reg.structure_stride; + register_info->binding_base_idx = symbol->info.reg.binding_base_idx; + register_info->is_aggregate = symbol->info.reg.is_aggregate; +- register_info->is_dynamically_indexed = symbol->info.reg.is_dynamically_indexed; + + return true; + } +@@ -3344,41 +3368,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp + } + else if (register_info->is_aggregate) + { +- if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_INCONTROLPOINT) +- { +- /* Indices for these are swapped compared to the generated SPIR-V. */ +- if (reg->idx[1].offset != ~0u) +- indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); +- if (reg->idx[0].offset != ~0u) +- indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); +- } +- else +- { +- struct vkd3d_shader_register_index reg_idx = reg->idx[0]; +- +- if (reg->idx[1].rel_addr) +- FIXME("Relative addressing not implemented.\n"); +- +- if (register_info->is_dynamically_indexed) +- { +- indexes[index_count++] = vkd3d_spirv_build_op_load(builder, +- vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1), +- register_info->member_idx, SpvMemoryAccessMaskNone); +- } +- else +- { +- reg_idx.offset = register_info->member_idx; +- indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®_idx); +- } +- } ++ /* Indices for these are swapped compared to the generated SPIR-V. */ ++ if (reg->idx_count > 2) ++ indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); ++ if (reg->idx_count > 1) ++ indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); ++ if (!index_count) ++ /* A register sysval which is an array in SPIR-V, e.g. SAMPLEMASK. */ ++ indexes[index_count++] = spirv_compiler_get_constant_uint(compiler, 0); + } + else + { +- if (reg->idx[1].rel_addr || (reg->idx[1].offset == ~0u && reg->idx[0].rel_addr)) ++ if (reg->idx_count && reg->idx[reg->idx_count - 1].rel_addr) + FIXME("Relative addressing not implemented.\n"); + + /* Handle arrayed registers, e.g. v[3][0]. */ +- if (reg->idx[1].offset != ~0u && !register_is_descriptor(reg)) ++ if (reg->idx_count > 1 && !register_is_descriptor(reg)) + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); + } + +@@ -4249,35 +4254,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp + if ((builtin = get_spirv_builtin_for_register(reg_type))) + return builtin; + +- if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT)) ++ if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT ++ && reg_type != VKD3DSPR_PATCHCONST)) + FIXME("Unhandled builtin (register type %#x, sysval %#x).\n", reg_type, sysval); + return NULL; + } + +-static const struct vkd3d_shader_signature_element *vkd3d_find_signature_element_for_reg( +- const struct vkd3d_shader_signature *signature, unsigned int *signature_element_index, +- unsigned int reg_idx, DWORD write_mask) +-{ +- unsigned int signature_idx; +- +- for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) +- { +- if (signature->elements[signature_idx].register_index == reg_idx +- && (signature->elements[signature_idx].mask & write_mask) == write_mask) +- { +- if (signature_element_index) +- *signature_element_index = signature_idx; +- return &signature->elements[signature_idx]; +- } +- } +- +- FIXME("Could not find shader signature element (register %u, write mask %#x).\n", +- reg_idx, write_mask); +- if (signature_element_index) +- *signature_element_index = ~0u; +- return NULL; +-} +- + static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) + { + struct vkd3d_shader_register r; +@@ -4288,6 +4270,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler + r.type = VKD3DSPR_OUTPOINTID; + r.idx[0].offset = ~0u; + r.idx[1].offset = ~0u; ++ r.idx_count = 0; + return spirv_compiler_get_register_id(compiler, &r); + } + +@@ -4302,7 +4285,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co + } + + static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compiler, +- uint32_t id, const struct vkd3d_shader_phase *phase, const char *suffix) ++ uint32_t id, const char *suffix) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const char *name; +@@ -4310,7 +4293,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile + if (!suffix) + suffix = ""; + +- switch (phase->type) ++ switch (compiler->phase) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + name = "control"; +@@ -4322,62 +4305,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile + name = "join"; + break; + default: +- ERR("Invalid phase type %#x.\n", phase->type); ++ ERR("Invalid phase type %#x.\n", compiler->phase); + return; + } +- vkd3d_spirv_build_op_name(builder, id, "%s%u%s", name, phase->idx, suffix); +-} +- +-static void spirv_compiler_begin_shader_phase(struct spirv_compiler *compiler, +- struct vkd3d_shader_phase *phase) +-{ +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- uint32_t void_id, function_type_id; +- unsigned int param_count; +- uint32_t param_type_id; +- +- if (phase->instance_count) +- { +- param_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); +- param_count = 1; +- } +- else +- { +- param_count = 0; +- } +- +- phase->function_id = vkd3d_spirv_alloc_id(builder); +- +- void_id = vkd3d_spirv_get_op_type_void(builder); +- function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, ¶m_type_id, param_count); +- vkd3d_spirv_build_op_function(builder, void_id, phase->function_id, +- SpvFunctionControlMaskNone, function_type_id); +- +- if (phase->instance_count) +- phase->instance_id = vkd3d_spirv_build_op_function_parameter(builder, param_type_id); +- +- vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); +- phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); +- +- spirv_compiler_emit_shader_phase_name(compiler, phase->function_id, phase, NULL); ++ vkd3d_spirv_build_op_name(builder, id, "%s%s", name, suffix); + } + + static const struct vkd3d_shader_phase *spirv_compiler_get_current_shader_phase( + struct spirv_compiler *compiler) + { +- struct vkd3d_shader_phase *phase; +- +- if (!compiler->shader_phase_count) ++ if (is_in_default_phase(compiler)) + return NULL; + +- phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; +- if (!phase->function_id) +- spirv_compiler_begin_shader_phase(compiler, phase); +- return phase; ++ return is_in_control_point_phase(compiler) ? &compiler->control_point_phase : &compiler->patch_constant_phase; + } + + static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, +- uint32_t id, unsigned int component_count, const struct vkd3d_shader_signature_element *signature_element) ++ uint32_t id, unsigned int component_count, const struct signature_element *signature_element) + { + const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; + const struct vkd3d_shader_transform_feedback_element *xfb_element; +@@ -4436,17 +4380,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); + } + +-static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, +- const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) ++static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *compiler, ++ const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, const unsigned int *array_sizes, ++ unsigned int size_count) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ unsigned int sizes[2]; + uint32_t id; + +- array_size = max(array_size, builtin->spirv_array_size); ++ assert(size_count <= ARRAY_SIZE(sizes)); ++ memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); ++ array_sizes = sizes; ++ sizes[0] = max(sizes[0], builtin->spirv_array_size); + +- id = spirv_compiler_emit_array_variable(compiler, +- &builder->global_stream, storage_class, +- builtin->component_type, builtin->component_count, array_size); ++ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, ++ builtin->component_type, builtin->component_count, array_sizes, size_count); + vkd3d_spirv_add_iface_variable(builder, id); + spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin); + +@@ -4458,54 +4406,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp + return id; + } + +-static bool needs_private_io_variable(const struct vkd3d_shader_signature *signature, +- unsigned int reg_idx, const struct vkd3d_spirv_builtin *builtin, +- unsigned int *component_count, unsigned int *out_write_mask) ++static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, ++ const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) + { +- unsigned int write_mask = 0; +- bool have_sysval = false; +- unsigned int i, count; +- +- /* Always use private variables for arrayed builtins. These are generally +- * scalars on the D3D side, so would need extra array indices when +- * accessing them. It may be feasible to insert those indices at the point +- * where the builtins are used, but it's not clear it's worth the effort. */ +- if (builtin && (builtin->spirv_array_size || builtin->fixup_pfn)) +- return true; +- +- if (*component_count == VKD3D_VEC4_SIZE) +- return false; +- +- for (i = 0, count = 0; i < signature->element_count; ++i) +- { +- const struct vkd3d_shader_signature_element *current = &signature->elements[i]; ++ return spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, &array_size, 1); ++} + +- if (current->register_index != reg_idx) +- continue; ++static bool needs_private_io_variable(const struct vkd3d_spirv_builtin *builtin) ++{ ++ return builtin && builtin->fixup_pfn; ++} + +- write_mask |= current->mask; +- ++count; ++static unsigned int shader_signature_next_location(const struct shader_signature *signature) ++{ ++ unsigned int i, max_row; + +- if (current->sysval_semantic) +- have_sysval = true; +- } ++ if (!signature) ++ return 0; + +- if (count == 1) +- return false; ++ for (i = 0, max_row = 0; i < signature->element_count; ++i) ++ max_row = max(max_row, signature->elements[i].register_index + signature->elements[i].register_count); ++ return max_row; ++} + +- if (builtin || have_sysval) +- return true; ++static unsigned int shader_register_get_io_indices(const struct vkd3d_shader_register *reg, ++ unsigned int *array_sizes) ++{ ++ unsigned int i, element_idx; + +- if (!vkd3d_bitmask_is_contiguous(write_mask)) ++ array_sizes[0] = 0; ++ array_sizes[1] = 0; ++ element_idx = reg->idx[0].offset; ++ for (i = 1; i < reg->idx_count; ++i) + { +- FIXME("Write mask %#x is non-contiguous.\n", write_mask); +- return true; ++ array_sizes[1] = array_sizes[0]; ++ array_sizes[0] = element_idx; ++ element_idx = reg->idx[i].offset; + } + +- assert(vkd3d_write_mask_component_count(write_mask) >= *component_count); +- *component_count = vkd3d_write_mask_component_count(write_mask); +- *out_write_mask = write_mask; +- return false; ++ return element_idx; + } + + static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +@@ -4513,48 +4452,32 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + enum vkd3d_shader_interpolation_mode interpolation_mode) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_signature_element *signature_element; +- const struct vkd3d_shader_signature *shader_signature; + const struct vkd3d_shader_register *reg = &dst->reg; + unsigned int component_idx, input_component_count; ++ const struct signature_element *signature_element; ++ const struct shader_signature *shader_signature; + enum vkd3d_shader_component_type component_type; + uint32_t type_id, ptr_type_id, float_type_id; + const struct vkd3d_spirv_builtin *builtin; ++ unsigned int write_mask, reg_write_mask; + struct vkd3d_symbol *symbol = NULL; + uint32_t val_id, input_id, var_id; + struct vkd3d_symbol reg_symbol; +- struct vkd3d_symbol tmp_symbol; + SpvStorageClass storage_class; + struct rb_entry *entry = NULL; + bool use_private_var = false; +- unsigned int write_mask; +- unsigned int array_size; +- unsigned int reg_idx; ++ unsigned int array_sizes[2]; ++ unsigned int element_idx; + uint32_t i, index; + +- assert(!reg->idx[0].rel_addr); +- assert(!reg->idx[1].rel_addr); +- +- if (reg->idx[1].offset != ~0u) +- { +- array_size = reg->idx[0].offset; +- reg_idx = reg->idx[1].offset; +- } +- else +- { +- array_size = 0; +- reg_idx = reg->idx[0].offset; +- } ++ assert(!reg->idx_count || !reg->idx[0].rel_addr); ++ assert(reg->idx_count < 2 || !reg->idx[1].rel_addr); + + shader_signature = reg->type == VKD3DSPR_PATCHCONST +- ? compiler->patch_constant_signature : compiler->input_signature; ++ ? &compiler->patch_constant_signature : &compiler->input_signature; + +- if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, +- NULL, reg_idx, dst->write_mask))) +- { +- FIXME("No signature element for shader input, ignoring shader input.\n"); +- return 0; +- } ++ element_idx = shader_register_get_io_indices(reg, array_sizes); ++ signature_element = &shader_signature->elements[element_idx]; + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) + sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic); +@@ -4576,12 +4499,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); + } + +- if (needs_private_io_variable(shader_signature, reg_idx, builtin, &input_component_count, &write_mask) +- && (compiler->shader_type != VKD3D_SHADER_TYPE_HULL +- || (reg->type != VKD3DSPR_INCONTROLPOINT && reg->type != VKD3DSPR_PATCHCONST))) ++ if (needs_private_io_variable(builtin)) ++ { + use_private_var = true; ++ reg_write_mask = write_mask; ++ } + else ++ { + component_idx = vkd3d_write_mask_get_component_idx(write_mask); ++ reg_write_mask = write_mask >> component_idx; ++ } + + storage_class = SpvStorageClassInput; + +@@ -4589,111 +4516,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + + if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + { ++ /* Except for vicp there should be one declaration per signature element. Sources of ++ * duplicate declarations are: a single register split into multiple declarations having ++ * different components, which should have been merged, and declarations in one phase ++ * being repeated in another (i.e. vcp/vocp), which should have been deleted. */ ++ if (reg->type != VKD3DSPR_INPUT || !is_in_fork_or_join_phase(compiler)) ++ FIXME("Duplicate input definition found.\n"); + symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); +- input_id = symbol->id; +- } +- else if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL +- && (reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)) +- { +- /* Input/output registers from one phase can be used as inputs in +- * subsequent phases. Specifically: +- * +- * - Control phase inputs are available as "vicp" in fork and join +- * phases. +- * - Control phase outputs are available as "vocp" in fork and join +- * phases. +- * - Fork phase patch constants are available as "vpc" in join +- * phases. +- * +- * We handle "vicp" and "vpc" here by creating aliases to the shader's +- * global inputs and outputs. We handle "vocp" in +- * spirv_compiler_leave_shader_phase(). */ +- +- tmp_symbol = reg_symbol; +- if (reg->type == VKD3DSPR_PATCHCONST) +- tmp_symbol.key.reg.type = VKD3DSPR_OUTPUT; +- else +- tmp_symbol.key.reg.type = VKD3DSPR_INPUT; +- +- if ((entry = rb_get(&compiler->symbol_table, &tmp_symbol))) +- { +- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); +- tmp_symbol = *symbol; +- tmp_symbol.key.reg.type = reg->type; +- spirv_compiler_put_symbol(compiler, &tmp_symbol); +- +- input_id = symbol->id; +- } +- else +- { +- if (reg->type == VKD3DSPR_PATCHCONST) +- ERR("Patch constant register %u was not declared in a previous phase.\n", reg_idx); +- else +- ERR("Input control point register %u was not declared in a previous phase.\n", reg_idx); +- } ++ return symbol->id; + } + +- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) ++ if (builtin) + { +- if (builtin) +- { +- input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); +- if (reg->type == VKD3DSPR_PATCHCONST) +- vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); +- } +- else +- { +- unsigned int location = reg_idx; +- +- input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, +- storage_class, component_type, input_component_count, array_size); +- vkd3d_spirv_add_iface_variable(builder, input_id); +- if (reg->type == VKD3DSPR_PATCHCONST) +- { +- vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); +- location += compiler->input_signature->element_count; +- } +- vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); +- if (component_idx) +- vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); +- +- spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); +- } ++ input_id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); ++ if (reg->type == VKD3DSPR_PATCHCONST) ++ vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); + } +- +- if (!symbol) ++ else + { +- var_id = input_id; +- if (use_private_var) ++ unsigned int location = signature_element->register_index; ++ ++ input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, ++ storage_class, component_type, input_component_count, array_sizes, 2); ++ vkd3d_spirv_add_iface_variable(builder, input_id); ++ if (reg->type == VKD3DSPR_PATCHCONST) + { +- storage_class = SpvStorageClassPrivate; +- var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, +- storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_size); ++ vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); ++ location += shader_signature_next_location(&compiler->input_signature); + } ++ vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); ++ if (component_idx) ++ vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); + +- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, +- use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, +- use_private_var ? VKD3DSP_WRITEMASK_ALL : write_mask); +- reg_symbol.info.reg.dcl_mask |= write_mask; +- spirv_compiler_put_symbol(compiler, ®_symbol); +- +- spirv_compiler_emit_register_debug_name(builder, var_id, reg); ++ spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); + } +- else ++ ++ var_id = input_id; ++ if (use_private_var) + { +- symbol->info.reg.dcl_mask |= write_mask; ++ storage_class = SpvStorageClassPrivate; ++ var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, ++ storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_sizes, 2); + } + ++ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, ++ use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, ++ use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); ++ reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; ++ assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); ++ spirv_compiler_put_symbol(compiler, ®_symbol); ++ ++ spirv_compiler_emit_register_debug_name(builder, var_id, reg); ++ + if (use_private_var) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, input_component_count); +- for (i = 0; i < max(array_size, 1); ++i) ++ for (i = 0; i < max(array_sizes[0], 1); ++i) + { + struct vkd3d_shader_register dst_reg = *reg; + dst_reg.data_type = VKD3D_DATA_FLOAT; + + val_id = input_id; +- if (array_size) ++ if (array_sizes[0]) + { + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); + index = spirv_compiler_get_constant_uint(compiler, i); +@@ -4708,7 +4592,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); + index = spirv_compiler_get_constant_uint(compiler, builtin->member_idx); + val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); +- dst_reg.idx[0].offset = reg_idx + i; ++ dst_reg.idx[0].offset = element_idx + i; + } + val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone); + +@@ -4743,9 +4627,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, + uint32_t write_mask; + uint32_t input_id; + +- assert(!reg->idx[0].rel_addr); +- assert(!reg->idx[1].rel_addr); +- assert(reg->idx[1].offset == ~0u); ++ assert(!reg->idx_count || !reg->idx[0].rel_addr); ++ assert(reg->idx_count < 2); + + if (!(builtin = get_spirv_builtin_for_register(reg->type))) + { +@@ -4763,19 +4646,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, + write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); + vkd3d_symbol_set_register_info(®_symbol, input_id, + SpvStorageClassInput, builtin->component_type, write_mask); +- reg_symbol.info.reg.dcl_mask = write_mask; + reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; + spirv_compiler_put_symbol(compiler, ®_symbol); + spirv_compiler_emit_register_debug_name(builder, input_id, reg); + } + + static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compiler, +- const struct vkd3d_shader_phase *phase, const struct vkd3d_shader_dst_param *dst) ++ const struct vkd3d_shader_dst_param *dst) + { +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_register *reg = &dst->reg; +- struct vkd3d_symbol reg_symbol; +- uint32_t val_id; + + switch (reg->type) + { +@@ -4787,10 +4666,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil + case VKD3DSPR_PRIMID: + spirv_compiler_emit_input_register(compiler, dst); + return; +- case VKD3DSPR_FORKINSTID: +- case VKD3DSPR_JOININSTID: +- val_id = phase->instance_id; +- break; + case VKD3DSPR_OUTPOINTID: /* Emitted in spirv_compiler_emit_initial_declarations(). */ + case VKD3DSPR_OUTCONTROLPOINT: /* See spirv_compiler_leave_shader_phase(). */ + return; +@@ -4798,22 +4673,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil + FIXME("Unhandled shader phase input register %#x.\n", reg->type); + return; + } +- +- vkd3d_symbol_make_register(®_symbol, reg); +- vkd3d_symbol_set_register_info(®_symbol, val_id, +- SpvStorageClassMax /* Intermediate value */, +- VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); +- spirv_compiler_put_symbol(compiler, ®_symbol); +- spirv_compiler_emit_register_debug_name(builder, val_id, reg); +-} +- +-static unsigned int spirv_compiler_get_output_variable_index( +- struct spirv_compiler *compiler, unsigned int register_idx) +-{ +- if (register_idx == ~0u) /* oDepth */ +- return ARRAY_SIZE(compiler->private_output_variable) - 1; +- assert(register_idx < ARRAY_SIZE(compiler->private_output_variable) - 1); +- return register_idx; + } + + static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, +@@ -4835,8 +4694,7 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) + return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; + } + +-static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signature_element *e, +- uint32_t *mask) ++static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) + { + if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) + { +@@ -4847,38 +4705,10 @@ static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signa + *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); + } + +-static uint32_t calculate_sysval_array_mask(struct spirv_compiler *compiler, +- const struct vkd3d_shader_signature *signature, enum vkd3d_shader_input_sysval_semantic sysval) +-{ +- const struct vkd3d_shader_signature_element *e; +- const struct vkd3d_spirv_builtin *sig_builtin; +- const struct vkd3d_spirv_builtin *builtin; +- uint32_t signature_idx, mask = 0; +- +- if (!(builtin = get_spirv_builtin_for_sysval(compiler, sysval))) +- { +- FIXME("Unhandled sysval %#x.\n", sysval); +- return 0; +- } +- +- for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) +- { +- e = &signature->elements[signature_idx]; +- +- sig_builtin = get_spirv_builtin_for_sysval(compiler, +- vkd3d_siv_from_sysval_indexed(e->sysval_semantic, e->semantic_index)); +- +- if (sig_builtin && sig_builtin->spirv_builtin == builtin->spirv_builtin) +- mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * sig_builtin->member_idx); +- } +- +- return mask; +-} +- + /* Emits arrayed SPIR-V built-in variables. */ + static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) + { +- const struct vkd3d_shader_signature *output_signature = compiler->output_signature; ++ const struct shader_signature *output_signature = &compiler->output_signature; + uint32_t clip_distance_mask = 0, clip_distance_id = 0; + uint32_t cull_distance_mask = 0, cull_distance_id = 0; + const struct vkd3d_spirv_builtin *builtin; +@@ -4886,7 +4716,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * + + for (i = 0; i < output_signature->element_count; ++i) + { +- const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; ++ const struct signature_element *e = &output_signature->elements[i]; + + switch (e->sysval_semantic) + { +@@ -4921,7 +4751,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * + + for (i = 0; i < output_signature->element_count; ++i) + { +- const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; ++ const struct signature_element *e = &output_signature->elements[i]; + + switch (e->sysval_semantic) + { +@@ -4953,9 +4783,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, + uint32_t write_mask; + uint32_t output_id; + +- assert(!reg->idx[0].rel_addr); +- assert(!reg->idx[1].rel_addr); +- assert(reg->idx[1].offset == ~0u); ++ assert(!reg->idx_count || !reg->idx[0].rel_addr); ++ assert(reg->idx_count < 2); + + if (!(builtin = get_spirv_builtin_for_register(reg->type))) + { +@@ -4969,7 +4798,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, + write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); + vkd3d_symbol_set_register_info(®_symbol, output_id, + SpvStorageClassOutput, builtin->component_type, write_mask); +- reg_symbol.info.reg.dcl_mask = write_mask; + reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; + spirv_compiler_put_symbol(compiler, ®_symbol); + spirv_compiler_emit_register_execution_mode(compiler, reg); +@@ -4977,7 +4805,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, + } + + static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, +- const struct vkd3d_shader_phase *phase, const struct vkd3d_spirv_builtin *builtin) ++ const struct vkd3d_spirv_builtin *builtin) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t *variable_id, id; +@@ -4993,7 +4821,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c + return *variable_id; + + id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); +- if (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE) ++ if (is_in_fork_or_join_phase(compiler)) + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); + + if (variable_id) +@@ -5005,44 +4833,34 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_signature_element *signature_element; +- const struct vkd3d_shader_signature *shader_signature; + const struct vkd3d_shader_register *reg = &dst->reg; + unsigned int component_idx, output_component_count; ++ const struct signature_element *signature_element; + enum vkd3d_shader_component_type component_type; ++ const struct shader_signature *shader_signature; + const struct vkd3d_spirv_builtin *builtin; +- const struct vkd3d_shader_phase *phase; +- struct vkd3d_symbol *symbol = NULL; ++ unsigned int write_mask, reg_write_mask; + bool use_private_variable = false; + struct vkd3d_symbol reg_symbol; + SpvStorageClass storage_class; +- struct rb_entry *entry = NULL; +- unsigned int signature_idx; +- unsigned int write_mask; +- unsigned int array_size; ++ unsigned int array_sizes[2]; ++ unsigned int element_idx; + bool is_patch_constant; + uint32_t id, var_id; + +- phase = spirv_compiler_get_current_shader_phase(compiler); +- is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); +- +- shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; ++ is_patch_constant = is_in_fork_or_join_phase(compiler); + +- array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; ++ shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; + +- if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, +- &signature_idx, reg->idx[0].offset, dst->write_mask))) +- { +- FIXME("No signature element for shader output, ignoring shader output.\n"); +- return; +- } ++ element_idx = shader_register_get_io_indices(reg, array_sizes); ++ signature_element = &shader_signature->elements[element_idx]; + + builtin = vkd3d_get_spirv_builtin(compiler, dst->reg.type, sysval); + + write_mask = signature_element->mask; + +- component_idx = vkd3d_write_mask_get_component_idx(dst->write_mask); +- output_component_count = vkd3d_write_mask_component_count(signature_element->mask); ++ component_idx = vkd3d_write_mask_get_component_idx(write_mask); ++ output_component_count = vkd3d_write_mask_component_count(write_mask); + if (builtin) + { + component_type = builtin->component_type; +@@ -5058,128 +4876,103 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + storage_class = SpvStorageClassOutput; + + if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE +- || needs_private_io_variable(shader_signature, signature_element->register_index, +- builtin, &output_component_count, &write_mask) +- || is_patch_constant) ++ || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask) ++ || needs_private_io_variable(builtin)) ++ { + use_private_variable = true; ++ reg_write_mask = write_mask; ++ } + else ++ { + component_idx = vkd3d_write_mask_get_component_idx(write_mask); ++ reg_write_mask = write_mask >> component_idx; ++ } + + vkd3d_symbol_make_register(®_symbol, reg); + +- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) ++ if (rb_get(&compiler->symbol_table, ®_symbol)) + { +- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); +- id = symbol->id; ++ /* See spirv_compiler_emit_input() for possible causes. */ ++ FIXME("Duplicate output definition found.\n"); ++ return; + } + +- if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) ++ if (compiler->output_info[element_idx].id) + { +- if (compiler->output_info[signature_idx].id) +- { +- id = compiler->output_info[signature_idx].id; +- if (compiler->output_info[signature_idx].array_element_mask) +- use_private_variable = true; +- } +- else if (builtin) +- { +- if (phase) +- id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, phase, builtin); +- else +- id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); +- +- if (builtin->spirv_array_size) +- compiler->output_info[signature_idx].array_element_mask = +- calculate_sysval_array_mask(compiler, shader_signature, sysval); +- +- spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); +- } ++ id = compiler->output_info[element_idx].id; ++ } ++ else if (builtin) ++ { ++ if (spirv_compiler_get_current_shader_phase(compiler)) ++ id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, builtin); + else +- { +- unsigned int location = reg->idx[0].offset; +- +- if (is_patch_constant) +- location += compiler->output_signature->element_count; +- +- id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, +- storage_class, component_type, output_component_count, array_size); +- vkd3d_spirv_add_iface_variable(builder, id); +- +- if (is_dual_source_blending(compiler) && reg->idx[0].offset < 2) +- { +- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); +- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, reg->idx[0].offset); +- } +- else +- { +- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); +- } +- +- if (component_idx) +- vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); +- } ++ id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); + +- if (is_patch_constant) +- vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); +- +- spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); +- +- compiler->output_info[signature_idx].id = id; +- compiler->output_info[signature_idx].component_type = component_type; ++ spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); + } +- +- if (!symbol) ++ else + { +- var_id = id; +- if (use_private_variable) +- storage_class = SpvStorageClassPrivate; ++ unsigned int location = signature_element->register_index; ++ + if (is_patch_constant) +- var_id = compiler->hs.patch_constants_id; +- else if (use_private_variable) +- var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, +- storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); ++ location += shader_signature_next_location(&compiler->output_signature); + +- vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, +- use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, +- use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); +- reg_symbol.info.reg.is_aggregate = use_private_variable ? is_patch_constant : array_size; +- if (!use_private_variable && is_control_point_phase(phase)) ++ id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, ++ storage_class, component_type, output_component_count, array_sizes, 2); ++ vkd3d_spirv_add_iface_variable(builder, id); ++ ++ if (is_dual_source_blending(compiler) && signature_element->register_index < 2) + { +- reg_symbol.info.reg.member_idx = spirv_compiler_get_invocation_id(compiler); +- reg_symbol.info.reg.is_dynamically_indexed = true; ++ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); ++ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); + } +- else if (is_patch_constant) ++ else + { +- reg_symbol.info.reg.member_idx = reg->idx[0].offset; ++ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); + } +- reg_symbol.info.reg.dcl_mask = write_mask; +- +- spirv_compiler_put_symbol(compiler, ®_symbol); + +- if (!is_patch_constant) +- spirv_compiler_emit_register_debug_name(builder, var_id, reg); ++ if (component_idx) ++ vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); + } +- else ++ ++ if (is_patch_constant) ++ vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); ++ ++ spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); ++ ++ compiler->output_info[element_idx].id = id; ++ compiler->output_info[element_idx].component_type = component_type; ++ ++ var_id = id; ++ if (use_private_variable) + { +- symbol->info.reg.dcl_mask |= write_mask; +- var_id = symbol->id; ++ storage_class = SpvStorageClassPrivate; ++ var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, ++ storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + } + ++ vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, ++ use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, ++ use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); ++ reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; ++ assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); ++ ++ spirv_compiler_put_symbol(compiler, ®_symbol); ++ ++ if (!is_patch_constant) ++ spirv_compiler_emit_register_debug_name(builder, var_id, reg); ++ + if (use_private_variable) + { +- unsigned int idx = spirv_compiler_get_output_variable_index(compiler, reg->idx[0].offset); +- compiler->private_output_variable[idx] = var_id; +- compiler->private_output_variable_write_mask[idx] |= dst->write_mask; +- if (is_patch_constant) +- compiler->private_output_variable_array_idx[idx] = spirv_compiler_get_constant_uint( +- compiler, reg->idx[0].offset); ++ compiler->private_output_variable[element_idx] = var_id; ++ compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; + if (!compiler->epilogue_function_id) + compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); + } + } + + static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, +- const struct vkd3d_shader_signature_element *e) ++ const struct signature_element *e) + { + enum vkd3d_shader_input_sysval_semantic sysval; + const struct vkd3d_spirv_builtin *builtin; +@@ -5198,14 +4991,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com + } + + static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, +- const struct vkd3d_shader_signature *signature, const struct vkd3d_shader_signature_element *output, ++ const struct shader_signature *signature, const struct signature_element *output, + const struct vkd3d_shader_output_info *output_info, + uint32_t output_index_id, uint32_t val_id, unsigned int write_mask) + { + unsigned int dst_write_mask, use_mask, uninit_mask, swizzle, mask; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; +- const struct vkd3d_shader_signature_element *element; ++ const struct signature_element *element; + unsigned int i, index, array_idx; + uint32_t output_id; + +@@ -5224,6 +5017,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi + use_mask |= element->used_mask; + } + } ++ index = vkd3d_write_mask_get_component_idx(output->mask); ++ dst_write_mask >>= index; ++ use_mask >>= index; + write_mask &= dst_write_mask; + + if (!write_mask) +@@ -5294,22 +5090,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * + uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; + uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_signature *signature; +- const struct vkd3d_shader_phase *phase; ++ const struct shader_signature *signature; + uint32_t output_index_id = 0; + bool is_patch_constant; + unsigned int i, count; +- DWORD variable_idx; + + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_id)); + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_type_id)); + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_array_idx)); + STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_write_mask)); + +- phase = spirv_compiler_get_current_shader_phase(compiler); +- is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); ++ is_patch_constant = is_in_fork_or_join_phase(compiler); + +- signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; ++ signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; + + function_id = compiler->epilogue_function_id; + +@@ -5340,7 +5133,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * + param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); + } + +- if (is_control_point_phase(phase)) ++ if (is_in_control_point_phase(compiler)) + output_index_id = spirv_compiler_emit_load_invocation_id(compiler); + + for (i = 0; i < signature->element_count; ++i) +@@ -5348,14 +5141,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * + if (!compiler->output_info[i].id) + continue; + +- variable_idx = spirv_compiler_get_output_variable_index(compiler, +- signature->elements[i].register_index); +- if (!param_id[variable_idx]) ++ if (!param_id[i]) + continue; + + spirv_compiler_emit_store_shader_output(compiler, signature, + &signature->elements[i], &compiler->output_info[i], output_index_id, +- param_id[variable_idx], compiler->private_output_variable_write_mask[variable_idx]); ++ param_id[i], compiler->private_output_variable_write_mask[i]); + } + + vkd3d_spirv_build_op_return(&compiler->spirv_builder); +@@ -5375,28 +5166,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp + dst.reg.type = VKD3DSPR_OUTPOINTID; + dst.reg.idx[0].offset = ~0u; + dst.reg.idx[1].offset = ~0u; ++ dst.reg.idx_count = 0; + dst.write_mask = VKD3DSP_WRITEMASK_0; + spirv_compiler_emit_input_register(compiler, &dst); + } + +-static void spirv_compiler_emit_hull_shader_patch_constants(struct spirv_compiler *compiler) +-{ +- const struct vkd3d_shader_signature *signature = compiler->patch_constant_signature; +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- uint32_t register_count = 0; +- unsigned int signature_idx; +- +- for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) +- register_count = max(register_count, signature->elements[signature_idx].register_index + 1); +- +- if (!register_count) +- return; +- +- compiler->hs.patch_constants_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, +- SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, register_count); +- vkd3d_spirv_build_op_name(builder, compiler->hs.patch_constants_id, "opc"); +-} +- + static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) + { + const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; +@@ -5410,7 +5184,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp + case VKD3D_SHADER_TYPE_HULL: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); + spirv_compiler_emit_hull_shader_builtins(compiler); +- spirv_compiler_emit_hull_shader_patch_constants(compiler); + break; + case VKD3D_SHADER_TYPE_DOMAIN: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); +@@ -5439,8 +5212,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + { + vkd3d_spirv_builder_begin_main_function(builder); +- +- spirv_compiler_emit_shader_signature_outputs(compiler); ++ compiler->main_block_open = true; + } + } + +@@ -5522,12 +5294,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil + reg.type = VKD3DSPR_IDXTEMP; + reg.idx[0].offset = temp->register_idx; + reg.idx[1].offset = ~0u; ++ reg.idx_count = 1; + + function_location = spirv_compiler_get_current_function_location(compiler); + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); + + id = spirv_compiler_emit_array_variable(compiler, &builder->function_stream, +- SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, temp->register_size); ++ SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, &temp->register_size, 1); + + spirv_compiler_emit_register_debug_name(builder, id, ®); + +@@ -6097,6 +5870,7 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; ++ enum vkd3d_shader_resource_type resource_type = semantic->resource_type; + uint32_t flags = instruction->flags; + + /* We don't distinguish between APPEND and COUNTER UAVs. */ +@@ -6104,8 +5878,13 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, + if (flags) + FIXME("Unhandled UAV flags %#x.\n", flags); + ++ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; ++ + spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, +- semantic->resource_type, semantic->resource_data_type[0], 0, false); ++ resource_type, semantic->resource_data_type[0], 0, false); + } + + static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, +@@ -6185,10 +5964,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; +- const struct vkd3d_shader_phase *phase; + +- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) +- spirv_compiler_emit_shader_phase_input(compiler, phase, dst); ++ if (spirv_compiler_get_current_shader_phase(compiler)) ++ spirv_compiler_emit_shader_phase_input(compiler, dst); + else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) + spirv_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); + else +@@ -6224,7 +6002,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, + { + const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; + +- if (vkd3d_shader_register_is_output(&dst->reg)) ++ if (vkd3d_shader_register_is_output(&dst->reg) ++ || (is_in_fork_or_join_phase(compiler) && vkd3d_shader_register_is_patch_constant(&dst->reg))) + spirv_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); + else + spirv_compiler_emit_output_register(compiler, dst); +@@ -6242,64 +6021,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, + spirv_compiler_emit_output(compiler, dst, sysval); + } + +-static bool spirv_compiler_check_index_range(struct spirv_compiler *compiler, +- const struct vkd3d_shader_index_range *range) +-{ +- const struct vkd3d_shader_register *reg = &range->dst.reg; +- struct vkd3d_shader_register_info reg_info; +- struct vkd3d_shader_register current_reg; +- struct vkd3d_symbol reg_symbol; +- unsigned int i; +- uint32_t id; +- +- current_reg = *reg; +- vkd3d_symbol_make_register(®_symbol, ¤t_reg); +- if (!spirv_compiler_get_register_info(compiler, ¤t_reg, ®_info)) +- { +- ERR("Failed to get register info.\n"); +- return false; +- } +- +- /* FIXME: We should check if it's an array. */ +- if (!reg_info.is_aggregate) +- { +- FIXME("Unhandled register %#x.\n", reg->type); +- return false; +- } +- id = reg_info.id; +- +- for (i = reg->idx[0].offset; i < reg->idx[0].offset + range->register_count; ++i) +- { +- current_reg.idx[0].offset = i; +- vkd3d_symbol_make_register(®_symbol, ¤t_reg); +- +- if (range->dst.write_mask != reg_info.write_mask +- || vkd3d_write_mask_component_count(reg_info.write_mask) != 1) +- { +- FIXME("Unhandled index range write mask %#x (%#x).\n", +- range->dst.write_mask, reg_info.write_mask); +- return false; +- } +- +- if (reg_info.id != id) +- { +- FIXME("Unhandled index range %#x, %u.\n", reg->type, i); +- return false; +- } +- } +- +- return true; +-} +- +-static void spirv_compiler_emit_dcl_index_range(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; +- +- if (!spirv_compiler_check_index_range(compiler, range)) +- FIXME("Ignoring dcl_index_range %#x %u.\n", range->dst.reg.type, range->register_count); +-} +- + static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +@@ -6495,157 +6216,83 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler + SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); + } + +-static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler, +- const struct vkd3d_shader_phase *phase) ++static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); ++ ++static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) + { +- const struct vkd3d_shader_signature *signature = compiler->output_signature; ++ const struct shader_signature *signature = &compiler->output_signature; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- struct vkd3d_symbol reg_symbol, *symbol; +- struct vkd3d_shader_register reg; +- struct rb_entry *entry; +- unsigned int i; ++ ++ if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) ++ spirv_compiler_emit_default_control_point_phase(compiler); + + vkd3d_spirv_build_op_function_end(builder); + + compiler->temp_id = 0; + compiler->temp_count = 0; + +- /* +- * vocp inputs in fork and join shader phases are outputs of the control +- * point phase. Reinsert symbols for vocp registers while leaving the +- * control point phase. +- */ +- if (is_control_point_phase(phase)) ++ if (is_in_control_point_phase(compiler)) + { + if (compiler->epilogue_function_id) + { +- spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, phase, "_epilogue"); ++ spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, "_epilogue"); + spirv_compiler_emit_shader_epilogue_function(compiler); + } + +- memset(®, 0, sizeof(reg)); +- reg.idx[1].offset = ~0u; +- + /* Fork and join phases share output registers (patch constants). + * Control point phase has separate output registers. */ + memset(compiler->output_info, 0, signature->element_count * sizeof(*compiler->output_info)); + memset(compiler->private_output_variable, 0, sizeof(compiler->private_output_variable)); + memset(compiler->private_output_variable_array_idx, 0, sizeof(compiler->private_output_variable_array_idx)); + memset(compiler->private_output_variable_write_mask, 0, sizeof(compiler->private_output_variable_write_mask)); +- +- for (i = 0; i < signature->element_count; ++i) +- { +- const struct vkd3d_shader_signature_element *e = &signature->elements[i]; +- +- reg.type = VKD3DSPR_OUTPUT; +- reg.idx[0].offset = e->register_index; +- vkd3d_symbol_make_register(®_symbol, ®); +- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) +- { +- rb_remove(&compiler->symbol_table, entry); +- +- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); +- +- reg.type = VKD3DSPR_OUTCONTROLPOINT; +- reg.idx[1].offset = reg.idx[0].offset; +- reg.idx[0].offset = compiler->output_control_point_count; +- vkd3d_symbol_make_register(symbol, ®); +- symbol->info.reg.is_aggregate = false; +- +- if (rb_put(&compiler->symbol_table, symbol, entry) == -1) +- { +- ERR("Failed to insert vocp symbol entry (%s).\n", debug_vkd3d_symbol(symbol)); +- vkd3d_symbol_free(entry, NULL); +- } +- } +- } +- } +- +- if (phase->instance_count) +- { +- memset(®, 0, sizeof(reg)); +- reg.type = phase->type == VKD3DSIH_HS_FORK_PHASE ? VKD3DSPR_FORKINSTID : VKD3DSPR_JOININSTID; +- reg.idx[0].offset = ~0u; +- reg.idx[1].offset = ~0u; +- vkd3d_symbol_make_register(®_symbol, ®); +- if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) +- { +- rb_remove(&compiler->symbol_table, entry); +- vkd3d_symbol_free(entry, NULL); +- } + } + } + + static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +- const struct vkd3d_shader_phase *previous_phase; ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ uint32_t function_id, void_id, function_type_id; + struct vkd3d_shader_phase *phase; + +- if ((previous_phase = spirv_compiler_get_current_shader_phase(compiler))) +- spirv_compiler_leave_shader_phase(compiler, previous_phase); ++ assert(compiler->phase != instruction->handler_idx); + +- if (!vkd3d_array_reserve((void **)&compiler->shader_phases, &compiler->shader_phases_size, +- compiler->shader_phase_count + 1, sizeof(*compiler->shader_phases))) +- return; +- phase = &compiler->shader_phases[compiler->shader_phase_count]; ++ if (!is_in_default_phase(compiler)) ++ spirv_compiler_leave_shader_phase(compiler); + +- phase->type = instruction->handler_idx; +- phase->idx = compiler->shader_phase_count; +- phase->instance_count = 0; +- phase->function_id = 0; +- phase->instance_id = 0; +- phase->function_location = 0; ++ function_id = vkd3d_spirv_alloc_id(builder); + +- ++compiler->shader_phase_count; +-} +- +-static int spirv_compiler_emit_shader_phase_instance_count(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- struct vkd3d_shader_phase *phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; +- +- if (!compiler->shader_phase_count +- || (phase->type != VKD3DSIH_HS_FORK_PHASE && phase->type != VKD3DSIH_HS_JOIN_PHASE) +- || phase->function_id) +- { +- WARN("Unexpected dcl_hs_{fork,join}_phase_instance_count instruction.\n"); +- return VKD3D_ERROR_INVALID_SHADER; +- } +- +- phase->instance_count = instruction->declaration.count; +- +- spirv_compiler_begin_shader_phase(compiler, phase); +- +- return VKD3D_OK; +-} ++ void_id = vkd3d_spirv_get_op_type_void(builder); ++ function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, NULL, 0); ++ vkd3d_spirv_build_op_function(builder, void_id, function_id, ++ SpvFunctionControlMaskNone, function_type_id); + +-static const struct vkd3d_shader_phase *spirv_compiler_get_control_point_phase( +- struct spirv_compiler *compiler) +-{ +- const struct vkd3d_shader_phase *phase; ++ vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); + +- if (compiler->shader_phase_count < 1) +- return NULL; ++ compiler->phase = instruction->handler_idx; ++ spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); + +- phase = &compiler->shader_phases[0]; +- if (is_control_point_phase(phase)) +- return phase; ++ phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ ? &compiler->control_point_phase : &compiler->patch_constant_phase; ++ phase->function_id = function_id; ++ phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); + +- return NULL; ++ if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ compiler->emit_default_control_point_phase = instruction->flags; + } + + static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) + { +- const struct vkd3d_shader_signature *output_signature = compiler->output_signature; +- const struct vkd3d_shader_signature *input_signature = compiler->input_signature; ++ const struct shader_signature *output_signature = &compiler->output_signature; ++ const struct shader_signature *input_signature = &compiler->input_signature; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + enum vkd3d_shader_component_type component_type; +- uint32_t input_id, output_id, dst_id, src_id; + struct vkd3d_shader_src_param invocation; + struct vkd3d_shader_register input_reg; + uint32_t type_id, output_ptr_type_id; ++ uint32_t input_id, output_id, dst_id; + unsigned int component_count; ++ unsigned int array_sizes[2]; + uint32_t invocation_id; + unsigned int i; + +@@ -6657,6 +6304,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile + invocation.reg.idx[0].offset = ~0u; + invocation.reg.idx[1].offset = ~0u; + invocation.reg.idx[2].offset = ~0u; ++ invocation.reg.idx_count = 0; + invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; + + memset(&input_reg, 0, sizeof(input_reg)); +@@ -6664,37 +6312,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile + input_reg.data_type = VKD3D_DATA_FLOAT; + input_reg.idx[0].rel_addr = &invocation; + input_reg.idx[2].offset = ~0u; ++ input_reg.idx_count = 2; + input_id = spirv_compiler_get_register_id(compiler, &input_reg); + + assert(input_signature->element_count == output_signature->element_count); + for (i = 0; i < output_signature->element_count; ++i) + { +- const struct vkd3d_shader_signature_element *output = &output_signature->elements[i]; +- const struct vkd3d_shader_signature_element *input = &input_signature->elements[i]; ++ const struct signature_element *output = &output_signature->elements[i]; ++ const struct signature_element *input = &input_signature->elements[i]; + + assert(input->mask == output->mask); + assert(input->component_type == output->component_type); + +- input_reg.idx[1].offset = input->register_index; ++ input_reg.idx[1].offset = i; + input_id = spirv_compiler_get_register_id(compiler, &input_reg); +- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); +- src_id = vkd3d_spirv_build_op_load(builder, type_id, input_id, SpvMemoryAccessMaskNone); + + component_type = output->component_type; + component_count = vkd3d_write_mask_component_count(output->mask); +- output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, +- SpvStorageClassOutput, component_type, component_count, compiler->output_control_point_count); ++ type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); ++ if ((array_sizes[0] = (input->register_count > 1) ? input->register_count : 0)) ++ type_id = vkd3d_spirv_get_op_type_array(builder, type_id, spirv_compiler_get_constant_uint(compiler, ++ array_sizes[0])); ++ ++ array_sizes[1] = compiler->output_control_point_count; ++ output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, SpvStorageClassOutput, ++ component_type, component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, output_id); + vkd3d_spirv_build_op_decorate1(builder, output_id, SpvDecorationLocation, output->register_index); + vkd3d_spirv_build_op_name(builder, output_id, "vocp%u", output->register_index); + +- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); + dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_id, invocation_id); + +- spirv_compiler_emit_store(compiler, dst_id, output->mask, +- component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_ALL, src_id); ++ vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); + } ++ ++ vkd3d_spirv_build_op_return(builder); + } + + static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, +@@ -6723,95 +6376,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi + SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); + } + +-static void spirv_compiler_emit_hull_shader_input_initialisation(struct spirv_compiler *compiler) +-{ +- uint32_t type_id, length_id, register_index_id, src_array_id, dst_array_id, vicp_id, tmp_id; +- const struct vkd3d_shader_signature *signature = compiler->input_signature; +- uint32_t src_type_id, dst_type_id, src_id, dst_id, point_index_id; +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_signature_element *element; +- enum vkd3d_shader_input_sysval_semantic sysval; +- const struct vkd3d_spirv_builtin *builtin; +- struct vkd3d_symbol *symbol, symbol_key; +- unsigned int register_count, i, j; +- struct vkd3d_shader_register r; +- struct rb_entry *entry; +- uint32_t indices[2]; +- +- for (i = 0, register_count = 0; i < signature->element_count; ++i) +- { +- register_count = max(register_count, signature->elements[i].register_index + 1); +- } +- +- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); +- length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); +- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); +- type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); +- +- memset(&r, 0, sizeof(r)); +- r.type = VKD3DSPR_INPUT; +- r.idx[0].offset = 0; +- r.idx[1].offset = ~0u; +- vkd3d_symbol_make_register(&symbol_key, &r); +- +- for (i = 0; i < signature->element_count; ++i) +- { +- element = &signature->elements[i]; +- +- symbol_key.key.reg.idx = element->register_index; +- entry = rb_get(&compiler->symbol_table, &symbol_key); +- symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); +- +- vicp_id = symbol->id; +- register_index_id = spirv_compiler_get_constant_uint(compiler, element->register_index); +- dst_array_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, type_id, vicp_id, register_index_id); +- +- if (element->sysval_semantic) +- { +- sysval = vkd3d_siv_from_sysval(element->sysval_semantic); +- builtin = get_spirv_builtin_for_sysval(compiler, sysval); +- src_array_id = spirv_compiler_emit_builtin_variable(compiler, builtin, +- SpvStorageClassInput, compiler->input_control_point_count); +- +- if (builtin->component_count == 4) +- { +- vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); +- } +- else +- { +- tmp_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, builtin->component_count); +- src_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, tmp_id); +- dst_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, tmp_id); +- +- for (j = 0; j < compiler->input_control_point_count; ++j) +- { +- point_index_id = spirv_compiler_get_constant_uint(compiler, j); +- src_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, +- src_type_id, src_array_id, point_index_id); +- +- indices[0] = point_index_id; +- indices[1] = spirv_compiler_get_constant_uint(compiler, 0); +- dst_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, +- dst_type_id, dst_array_id, indices, 2); +- +- vkd3d_spirv_build_op_copy_memory(builder, dst_id, src_id, SpvMemoryAccessMaskNone); +- } +- } +- } +- else +- { +- src_array_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, +- SpvStorageClassInput, VKD3D_SHADER_COMPONENT_FLOAT, 4, compiler->input_control_point_count); +- vkd3d_spirv_add_iface_variable(builder, src_array_id); +- vkd3d_spirv_build_op_decorate1(builder, src_array_id, SpvDecorationLocation, element->register_index); +- vkd3d_spirv_build_op_name(builder, src_array_id, "v%u", element->register_index); +- +- vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); +- } +- symbol->info.reg.dcl_mask |= element->mask; +- } +-} +- + static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler *compiler) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +@@ -6854,46 +6418,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler + static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_phase *control_point_phase, *phase; +- uint32_t phase_instance_id; +- unsigned int i, j; + uint32_t void_id; + + vkd3d_spirv_builder_begin_main_function(builder); + +- spirv_compiler_emit_hull_shader_input_initialisation(compiler); +- + void_id = vkd3d_spirv_get_op_type_void(builder); + +- if ((control_point_phase = spirv_compiler_get_control_point_phase(compiler))) +- vkd3d_spirv_build_op_function_call(builder, void_id, control_point_phase->function_id, NULL, 0); +- else +- spirv_compiler_emit_default_control_point_phase(compiler); ++ vkd3d_spirv_build_op_function_call(builder, void_id, compiler->control_point_phase.function_id, NULL, 0); + + if (compiler->use_vocp) + spirv_compiler_emit_hull_shader_barrier(compiler); + +- for (i = 0; i < compiler->shader_phase_count; ++i) +- { +- phase = &compiler->shader_phases[i]; +- if (is_control_point_phase(phase)) +- continue; +- +- if (phase->instance_count) +- { +- for (j = 0; j < phase->instance_count; ++j) +- { +- phase_instance_id = spirv_compiler_get_constant_uint(compiler, j); +- vkd3d_spirv_build_op_function_call(builder, +- void_id, phase->function_id, &phase_instance_id, 1); +- } +- } +- else +- { +- vkd3d_spirv_build_op_function_call(builder, void_id, phase->function_id, NULL, 0); +- } +- } +- ++ /* TODO: only call the patch constant function for invocation 0. The simplest way ++ * is to avoid use of private variables there, otherwise we would need a separate ++ * patch constant epilogue also only called from invocation 0. */ ++ vkd3d_spirv_build_op_function_call(builder, void_id, compiler->patch_constant_phase.function_id, NULL, 0); + spirv_compiler_emit_shader_epilogue_invocation(compiler); + vkd3d_spirv_build_op_return(builder); + vkd3d_spirv_build_op_function_end(builder); +@@ -7575,10 +7114,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co + static void spirv_compiler_emit_return(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +- const struct vkd3d_shader_phase *phase = spirv_compiler_get_current_shader_phase(compiler); + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + +- if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (!phase || is_control_point_phase(phase))) ++ if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) ++ || is_in_control_point_phase(compiler))) + spirv_compiler_emit_shader_epilogue_invocation(compiler); + + vkd3d_spirv_build_op_return(builder); +@@ -7972,12 +7511,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c + + if (cf_info) + cf_info->inside_block = false; ++ else ++ compiler->main_block_open = false; + break; + + case VKD3DSIH_RETP: + spirv_compiler_emit_retc(compiler, instruction); + break; + ++ case VKD3DSIH_DISCARD: + case VKD3DSIH_TEXKILL: + spirv_compiler_emit_kill(compiler, instruction); + break; +@@ -8256,7 +7798,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, + image_operands[image_operand_count++] = spirv_compiler_emit_texel_offset(compiler, + instruction, image.resource_type_info); + } +- if (multisample) ++ if (multisample && image.resource_type_info->ms) + { + operands_mask |= SpvImageOperandsSampleMask; + image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, +@@ -9521,58 +9063,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_end_primitive(builder); + } + +-static void spirv_compiler_emit_hull_shader_inputs(struct spirv_compiler *compiler) +-{ +- const struct vkd3d_shader_signature *signature = compiler->input_signature; +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- uint32_t type_id, length_id, vicp_id, vicp_type_id; +- unsigned int register_count, register_idx, i; +- struct vkd3d_shader_register r; +- struct vkd3d_symbol symbol; +- struct rb_entry *entry; +- +- for (i = 0, register_count = 0; i < signature->element_count; ++i) +- { +- register_count = max(register_count, signature->elements[i].register_index + 1); +- } +- +- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); +- length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); +- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); +- length_id = spirv_compiler_get_constant_uint(compiler, register_count); +- type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); +- vicp_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); +- +- vicp_id = vkd3d_spirv_build_op_variable(builder, +- &builder->global_stream, vicp_type_id, SpvStorageClassPrivate, 0); +- vkd3d_spirv_build_op_name(builder, vicp_id, "vicp"); +- +- memset(&r, 0, sizeof(r)); +- r.type = VKD3DSPR_INPUT; +- r.idx[0].offset = 0; +- r.idx[1].offset = ~0u; +- vkd3d_symbol_make_register(&symbol, &r); +- +- for (i = 0; i < signature->element_count; ++i) +- { +- register_idx = signature->elements[i].register_index; +- +- symbol.key.reg.idx = register_idx; +- if ((entry = rb_get(&compiler->symbol_table, &symbol))) +- { +- struct vkd3d_symbol *s = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); +- s->info.reg.dcl_mask |= signature->elements[i].mask; +- continue; +- } +- +- vkd3d_symbol_set_register_info(&symbol, vicp_id, SpvStorageClassPrivate, +- VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); +- symbol.info.reg.dcl_mask = signature->elements[i].mask; +- symbol.info.reg.is_aggregate = true; +- spirv_compiler_put_symbol(compiler, &symbol); +- } +-} +- + /* This function is called after declarations are processed. */ + static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) + { +@@ -9581,8 +9071,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) + if (compiler->xfb_info && compiler->xfb_info->element_count + && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) + spirv_compiler_emit_point_size(compiler); +- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) +- spirv_compiler_emit_hull_shader_inputs(compiler); + } + + static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) +@@ -9660,9 +9148,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_OUTPUT_SIV: + spirv_compiler_emit_dcl_output_siv(compiler, instruction); + break; +- case VKD3DSIH_DCL_INDEX_RANGE: +- spirv_compiler_emit_dcl_index_range(compiler, instruction); +- break; + case VKD3DSIH_DCL_STREAM: + spirv_compiler_emit_dcl_stream(compiler, instruction); + break; +@@ -9699,10 +9184,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_THREAD_GROUP: + spirv_compiler_emit_dcl_thread_group(compiler, instruction); + break; +- case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: +- case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: +- ret = spirv_compiler_emit_shader_phase_instance_count(compiler, instruction); +- break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +@@ -9826,6 +9307,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_CONTINUE: + case VKD3DSIH_CONTINUEP: + case VKD3DSIH_DEFAULT: ++ case VKD3DSIH_DISCARD: + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: + case VKD3DSIH_ENDLOOP: +@@ -9947,28 +9429,55 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + return ret; + } + +-int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, ++static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, + struct vkd3d_shader_code *spirv) + { +- const struct vkd3d_shader_instruction_array *instructions = &parser->instructions; + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_phase *phase; ++ struct vkd3d_shader_instruction_array instructions; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; + + compiler->location.column = 0; +- for (i = 0; i < instructions->count; ++i) ++ compiler->location.line = 1; ++ ++ instructions = parser->instructions; ++ memset(&parser->instructions, 0, sizeof(parser->instructions)); ++ ++ if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL ++ && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) ++ { ++ result = instruction_array_normalise_hull_shader_control_point_io(&instructions, ++ &parser->shader_desc.input_signature); ++ } ++ if (result >= 0) ++ result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, ++ &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); ++ ++ if (result >= 0 && TRACE_ON()) ++ vkd3d_shader_trace(&instructions, &parser->shader_version); ++ ++ if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) ++ spirv_compiler_emit_shader_signature_outputs(compiler); ++ ++ for (i = 0; i < instructions.count && result >= 0; ++i) + { + compiler->location.line = i + 1; +- if ((result = spirv_compiler_handle_instruction(compiler, &instructions->elements[i])) < 0) +- return result; ++ result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); + } + +- if ((phase = spirv_compiler_get_current_shader_phase(compiler))) +- spirv_compiler_leave_shader_phase(compiler, phase); ++ shader_instruction_array_destroy(&instructions); ++ ++ if (result < 0) ++ return result; ++ ++ if (compiler->main_block_open) ++ vkd3d_spirv_build_op_return(builder); ++ ++ if (!is_in_default_phase(compiler)) ++ spirv_compiler_leave_shader_phase(compiler); + else + vkd3d_spirv_build_op_function_end(builder); + +@@ -10023,23 +9532,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + return VKD3D_OK; + } + +-void spirv_compiler_destroy(struct spirv_compiler *compiler) ++int spirv_compile(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { +- vkd3d_free(compiler->control_flow_info); +- +- vkd3d_free(compiler->output_info); +- +- vkd3d_free(compiler->push_constants); +- vkd3d_free(compiler->descriptor_offset_ids); +- +- vkd3d_spirv_builder_free(&compiler->spirv_builder); +- +- rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); ++ struct spirv_compiler *spirv_compiler; ++ int ret; + +- vkd3d_free(compiler->shader_phases); +- vkd3d_free(compiler->spec_constants); ++ if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, ++ compile_info, scan_descriptor_info, message_context, &parser->location))) ++ { ++ ERR("Failed to create SPIR-V compiler.\n"); ++ return VKD3D_ERROR; ++ } + +- vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); ++ ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); + +- vkd3d_free(compiler); ++ spirv_compiler_destroy(spirv_compiler); ++ return ret; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +new file mode 100644 +index 00000000000..2166eb41ac2 +--- /dev/null ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -0,0 +1,5219 @@ ++/* ++ * TPF (Direct3D shader models 4 and 5 bytecode) support ++ * ++ * Copyright 2008-2009 Henri Verbeet for CodeWeavers ++ * Copyright 2010 Rico Schüller ++ * Copyright 2017 Józef Kucia for CodeWeavers ++ * Copyright 2019-2020 Zebediah Figura for CodeWeavers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA ++ */ ++ ++#include "hlsl.h" ++ ++#define SM4_MAX_SRC_COUNT 6 ++#define SM4_MAX_DST_COUNT 2 ++ ++STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); ++ ++#define VKD3D_SM4_PS 0x0000u ++#define VKD3D_SM4_VS 0x0001u ++#define VKD3D_SM4_GS 0x0002u ++#define VKD3D_SM5_HS 0x0003u ++#define VKD3D_SM5_DS 0x0004u ++#define VKD3D_SM5_CS 0x0005u ++#define VKD3D_SM4_LIB 0xfff0u ++ ++#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) ++ ++#define VKD3D_SM4_MODIFIER_MASK 0x3fu ++ ++#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 ++#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) ++ ++#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 ++#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) ++ ++#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 ++#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) ++ ++#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 ++#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) ++#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 ++#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) ++#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 ++#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) ++ ++#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 ++#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) ++ ++#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 ++#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) ++ ++#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 ++#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) ++ ++#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 ++#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) ++ ++#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 ++#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) ++ ++#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 ++#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) ++ ++#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 ++#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) ++ ++#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 ++#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) ++ ++#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 ++#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) ++ ++#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 ++#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) ++ ++#define VKD3D_SM5_PRECISE_SHIFT 19 ++#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) ++ ++#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 ++#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) ++ ++#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 ++#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu ++ ++#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 ++#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) ++ ++#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 ++#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) ++ ++#define VKD3D_SM5_TESSELLATOR_SHIFT 11 ++#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) ++ ++#define VKD3D_SM4_OPCODE_MASK 0xff ++ ++#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) ++ ++#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu ++ ++#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 ++#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) ++ ++#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 ++#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) ++ ++#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 ++#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) ++ ++#define VKD3D_SM4_ADDRESSING_SHIFT2 28 ++#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) ++ ++#define VKD3D_SM4_ADDRESSING_SHIFT1 25 ++#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) ++ ++#define VKD3D_SM4_ADDRESSING_SHIFT0 22 ++#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) ++ ++#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 ++#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) ++ ++#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 ++#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) ++ ++#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 ++#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) ++ ++#define VKD3D_SM4_DIMENSION_SHIFT 0 ++#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) ++ ++#define VKD3D_SM4_WRITEMASK_SHIFT 4 ++#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) ++ ++#define VKD3D_SM4_SWIZZLE_SHIFT 4 ++#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) ++ ++#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) ++#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) ++ ++#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 ++#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 ++ ++#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 ++ ++#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) ++ ++#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) ++ ++/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ ++#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 ++ ++enum vkd3d_sm4_opcode ++{ ++ VKD3D_SM4_OP_ADD = 0x00, ++ VKD3D_SM4_OP_AND = 0x01, ++ VKD3D_SM4_OP_BREAK = 0x02, ++ VKD3D_SM4_OP_BREAKC = 0x03, ++ VKD3D_SM4_OP_CASE = 0x06, ++ VKD3D_SM4_OP_CONTINUE = 0x07, ++ VKD3D_SM4_OP_CONTINUEC = 0x08, ++ VKD3D_SM4_OP_CUT = 0x09, ++ VKD3D_SM4_OP_DEFAULT = 0x0a, ++ VKD3D_SM4_OP_DERIV_RTX = 0x0b, ++ VKD3D_SM4_OP_DERIV_RTY = 0x0c, ++ VKD3D_SM4_OP_DISCARD = 0x0d, ++ VKD3D_SM4_OP_DIV = 0x0e, ++ VKD3D_SM4_OP_DP2 = 0x0f, ++ VKD3D_SM4_OP_DP3 = 0x10, ++ VKD3D_SM4_OP_DP4 = 0x11, ++ VKD3D_SM4_OP_ELSE = 0x12, ++ VKD3D_SM4_OP_EMIT = 0x13, ++ VKD3D_SM4_OP_ENDIF = 0x15, ++ VKD3D_SM4_OP_ENDLOOP = 0x16, ++ VKD3D_SM4_OP_ENDSWITCH = 0x17, ++ VKD3D_SM4_OP_EQ = 0x18, ++ VKD3D_SM4_OP_EXP = 0x19, ++ VKD3D_SM4_OP_FRC = 0x1a, ++ VKD3D_SM4_OP_FTOI = 0x1b, ++ VKD3D_SM4_OP_FTOU = 0x1c, ++ VKD3D_SM4_OP_GE = 0x1d, ++ VKD3D_SM4_OP_IADD = 0x1e, ++ VKD3D_SM4_OP_IF = 0x1f, ++ VKD3D_SM4_OP_IEQ = 0x20, ++ VKD3D_SM4_OP_IGE = 0x21, ++ VKD3D_SM4_OP_ILT = 0x22, ++ VKD3D_SM4_OP_IMAD = 0x23, ++ VKD3D_SM4_OP_IMAX = 0x24, ++ VKD3D_SM4_OP_IMIN = 0x25, ++ VKD3D_SM4_OP_IMUL = 0x26, ++ VKD3D_SM4_OP_INE = 0x27, ++ VKD3D_SM4_OP_INEG = 0x28, ++ VKD3D_SM4_OP_ISHL = 0x29, ++ VKD3D_SM4_OP_ISHR = 0x2a, ++ VKD3D_SM4_OP_ITOF = 0x2b, ++ VKD3D_SM4_OP_LABEL = 0x2c, ++ VKD3D_SM4_OP_LD = 0x2d, ++ VKD3D_SM4_OP_LD2DMS = 0x2e, ++ VKD3D_SM4_OP_LOG = 0x2f, ++ VKD3D_SM4_OP_LOOP = 0x30, ++ VKD3D_SM4_OP_LT = 0x31, ++ VKD3D_SM4_OP_MAD = 0x32, ++ VKD3D_SM4_OP_MIN = 0x33, ++ VKD3D_SM4_OP_MAX = 0x34, ++ VKD3D_SM4_OP_SHADER_DATA = 0x35, ++ VKD3D_SM4_OP_MOV = 0x36, ++ VKD3D_SM4_OP_MOVC = 0x37, ++ VKD3D_SM4_OP_MUL = 0x38, ++ VKD3D_SM4_OP_NE = 0x39, ++ VKD3D_SM4_OP_NOP = 0x3a, ++ VKD3D_SM4_OP_NOT = 0x3b, ++ VKD3D_SM4_OP_OR = 0x3c, ++ VKD3D_SM4_OP_RESINFO = 0x3d, ++ VKD3D_SM4_OP_RET = 0x3e, ++ VKD3D_SM4_OP_RETC = 0x3f, ++ VKD3D_SM4_OP_ROUND_NE = 0x40, ++ VKD3D_SM4_OP_ROUND_NI = 0x41, ++ VKD3D_SM4_OP_ROUND_PI = 0x42, ++ VKD3D_SM4_OP_ROUND_Z = 0x43, ++ VKD3D_SM4_OP_RSQ = 0x44, ++ VKD3D_SM4_OP_SAMPLE = 0x45, ++ VKD3D_SM4_OP_SAMPLE_C = 0x46, ++ VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, ++ VKD3D_SM4_OP_SAMPLE_LOD = 0x48, ++ VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, ++ VKD3D_SM4_OP_SAMPLE_B = 0x4a, ++ VKD3D_SM4_OP_SQRT = 0x4b, ++ VKD3D_SM4_OP_SWITCH = 0x4c, ++ VKD3D_SM4_OP_SINCOS = 0x4d, ++ VKD3D_SM4_OP_UDIV = 0x4e, ++ VKD3D_SM4_OP_ULT = 0x4f, ++ VKD3D_SM4_OP_UGE = 0x50, ++ VKD3D_SM4_OP_UMUL = 0x51, ++ VKD3D_SM4_OP_UMAX = 0x53, ++ VKD3D_SM4_OP_UMIN = 0x54, ++ VKD3D_SM4_OP_USHR = 0x55, ++ VKD3D_SM4_OP_UTOF = 0x56, ++ VKD3D_SM4_OP_XOR = 0x57, ++ VKD3D_SM4_OP_DCL_RESOURCE = 0x58, ++ VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, ++ VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, ++ VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, ++ VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, ++ VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, ++ VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, ++ VKD3D_SM4_OP_DCL_INPUT = 0x5f, ++ VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, ++ VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, ++ VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, ++ VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, ++ VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, ++ VKD3D_SM4_OP_DCL_OUTPUT = 0x65, ++ VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, ++ VKD3D_SM4_OP_DCL_TEMPS = 0x68, ++ VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, ++ VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, ++ VKD3D_SM4_OP_LOD = 0x6c, ++ VKD3D_SM4_OP_GATHER4 = 0x6d, ++ VKD3D_SM4_OP_SAMPLE_POS = 0x6e, ++ VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, ++ VKD3D_SM5_OP_HS_DECLS = 0x71, ++ VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, ++ VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, ++ VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, ++ VKD3D_SM5_OP_EMIT_STREAM = 0x75, ++ VKD3D_SM5_OP_CUT_STREAM = 0x76, ++ VKD3D_SM5_OP_FCALL = 0x78, ++ VKD3D_SM5_OP_BUFINFO = 0x79, ++ VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, ++ VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, ++ VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, ++ VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, ++ VKD3D_SM5_OP_GATHER4_C = 0x7e, ++ VKD3D_SM5_OP_GATHER4_PO = 0x7f, ++ VKD3D_SM5_OP_GATHER4_PO_C = 0x80, ++ VKD3D_SM5_OP_RCP = 0x81, ++ VKD3D_SM5_OP_F32TOF16 = 0x82, ++ VKD3D_SM5_OP_F16TOF32 = 0x83, ++ VKD3D_SM5_OP_COUNTBITS = 0x86, ++ VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, ++ VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, ++ VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, ++ VKD3D_SM5_OP_UBFE = 0x8a, ++ VKD3D_SM5_OP_IBFE = 0x8b, ++ VKD3D_SM5_OP_BFI = 0x8c, ++ VKD3D_SM5_OP_BFREV = 0x8d, ++ VKD3D_SM5_OP_SWAPC = 0x8e, ++ VKD3D_SM5_OP_DCL_STREAM = 0x8f, ++ VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, ++ VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, ++ VKD3D_SM5_OP_DCL_INTERFACE = 0x92, ++ VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, ++ VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, ++ VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, ++ VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, ++ VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, ++ VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, ++ VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, ++ VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, ++ VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, ++ VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, ++ VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, ++ VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, ++ VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, ++ VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, ++ VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, ++ VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, ++ VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, ++ VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, ++ VKD3D_SM5_OP_LD_RAW = 0xa5, ++ VKD3D_SM5_OP_STORE_RAW = 0xa6, ++ VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, ++ VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, ++ VKD3D_SM5_OP_ATOMIC_AND = 0xa9, ++ VKD3D_SM5_OP_ATOMIC_OR = 0xaa, ++ VKD3D_SM5_OP_ATOMIC_XOR = 0xab, ++ VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, ++ VKD3D_SM5_OP_ATOMIC_IADD = 0xad, ++ VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, ++ VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, ++ VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, ++ VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, ++ VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, ++ VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, ++ VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, ++ VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, ++ VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, ++ VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, ++ VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, ++ VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, ++ VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, ++ VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, ++ VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, ++ VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, ++ VKD3D_SM5_OP_SYNC = 0xbe, ++ VKD3D_SM5_OP_DADD = 0xbf, ++ VKD3D_SM5_OP_DMAX = 0xc0, ++ VKD3D_SM5_OP_DMIN = 0xc1, ++ VKD3D_SM5_OP_DMUL = 0xc2, ++ VKD3D_SM5_OP_DEQ = 0xc3, ++ VKD3D_SM5_OP_DGE = 0xc4, ++ VKD3D_SM5_OP_DLT = 0xc5, ++ VKD3D_SM5_OP_DNE = 0xc6, ++ VKD3D_SM5_OP_DMOV = 0xc7, ++ VKD3D_SM5_OP_DMOVC = 0xc8, ++ VKD3D_SM5_OP_DTOF = 0xc9, ++ VKD3D_SM5_OP_FTOD = 0xca, ++ VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, ++ VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, ++ VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, ++ VKD3D_SM5_OP_DDIV = 0xd2, ++ VKD3D_SM5_OP_DFMA = 0xd3, ++ VKD3D_SM5_OP_DRCP = 0xd4, ++ VKD3D_SM5_OP_MSAD = 0xd5, ++ VKD3D_SM5_OP_DTOI = 0xd6, ++ VKD3D_SM5_OP_DTOU = 0xd7, ++ VKD3D_SM5_OP_ITOD = 0xd8, ++ VKD3D_SM5_OP_UTOD = 0xd9, ++ VKD3D_SM5_OP_GATHER4_S = 0xdb, ++ VKD3D_SM5_OP_GATHER4_C_S = 0xdc, ++ VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, ++ VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, ++ VKD3D_SM5_OP_LD_S = 0xdf, ++ VKD3D_SM5_OP_LD2DMS_S = 0xe0, ++ VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, ++ VKD3D_SM5_OP_LD_RAW_S = 0xe2, ++ VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, ++ VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, ++ VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, ++ VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, ++ VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, ++ VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, ++ VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, ++ VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, ++}; ++ ++enum vkd3d_sm4_instruction_modifier ++{ ++ VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, ++ VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, ++ VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, ++}; ++ ++enum vkd3d_sm4_register_type ++{ ++ VKD3D_SM4_RT_TEMP = 0x00, ++ VKD3D_SM4_RT_INPUT = 0x01, ++ VKD3D_SM4_RT_OUTPUT = 0x02, ++ VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, ++ VKD3D_SM4_RT_IMMCONST = 0x04, ++ VKD3D_SM4_RT_IMMCONST64 = 0x05, ++ VKD3D_SM4_RT_SAMPLER = 0x06, ++ VKD3D_SM4_RT_RESOURCE = 0x07, ++ VKD3D_SM4_RT_CONSTBUFFER = 0x08, ++ VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, ++ VKD3D_SM4_RT_PRIMID = 0x0b, ++ VKD3D_SM4_RT_DEPTHOUT = 0x0c, ++ VKD3D_SM4_RT_NULL = 0x0d, ++ VKD3D_SM4_RT_RASTERIZER = 0x0e, ++ VKD3D_SM4_RT_OMASK = 0x0f, ++ VKD3D_SM5_RT_STREAM = 0x10, ++ VKD3D_SM5_RT_FUNCTION_BODY = 0x11, ++ VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, ++ VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, ++ VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, ++ VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, ++ VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, ++ VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, ++ VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, ++ VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, ++ VKD3D_SM5_RT_UAV = 0x1e, ++ VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, ++ VKD3D_SM5_RT_THREAD_ID = 0x20, ++ VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, ++ VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, ++ VKD3D_SM5_RT_COVERAGE = 0x23, ++ VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, ++ VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, ++ VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, ++ VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, ++ VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, ++}; ++ ++enum vkd3d_sm4_extended_operand_type ++{ ++ VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, ++ VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, ++}; ++ ++enum vkd3d_sm4_register_modifier ++{ ++ VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, ++ VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, ++ VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, ++ VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, ++}; ++ ++enum vkd3d_sm4_register_precision ++{ ++ VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, ++ VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, ++ VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, ++ VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, ++ VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, ++}; ++ ++enum vkd3d_sm4_output_primitive_type ++{ ++ VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, ++ VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, ++ VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, ++}; ++ ++enum vkd3d_sm4_input_primitive_type ++{ ++ VKD3D_SM4_INPUT_PT_POINT = 0x01, ++ VKD3D_SM4_INPUT_PT_LINE = 0x02, ++ VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, ++ VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, ++ VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, ++ VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, ++ VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, ++ VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, ++ VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, ++ VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, ++ VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, ++ VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, ++ VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, ++ VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, ++ VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, ++ VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, ++ VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, ++ VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, ++ VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, ++ VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, ++ VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, ++ VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, ++ VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, ++ VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, ++ VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, ++ VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, ++ VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, ++ VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, ++ VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, ++ VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, ++ VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, ++ VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, ++ VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, ++ VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, ++ VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, ++ VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, ++ VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, ++}; ++ ++enum vkd3d_sm4_swizzle_type ++{ ++ VKD3D_SM4_SWIZZLE_NONE = 0x0, ++ VKD3D_SM4_SWIZZLE_VEC4 = 0x1, ++ VKD3D_SM4_SWIZZLE_SCALAR = 0x2, ++}; ++ ++enum vkd3d_sm4_dimension ++{ ++ VKD3D_SM4_DIMENSION_NONE = 0x0, ++ VKD3D_SM4_DIMENSION_SCALAR = 0x1, ++ VKD3D_SM4_DIMENSION_VEC4 = 0x2, ++}; ++ ++enum vkd3d_sm4_resource_type ++{ ++ VKD3D_SM4_RESOURCE_BUFFER = 0x1, ++ VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, ++ VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, ++ VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, ++ VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, ++ VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, ++ VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, ++ VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, ++ VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, ++ VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, ++ VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, ++ VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, ++}; ++ ++enum vkd3d_sm4_data_type ++{ ++ VKD3D_SM4_DATA_UNORM = 0x1, ++ VKD3D_SM4_DATA_SNORM = 0x2, ++ VKD3D_SM4_DATA_INT = 0x3, ++ VKD3D_SM4_DATA_UINT = 0x4, ++ VKD3D_SM4_DATA_FLOAT = 0x5, ++ VKD3D_SM4_DATA_MIXED = 0x6, ++ VKD3D_SM4_DATA_DOUBLE = 0x7, ++ VKD3D_SM4_DATA_CONTINUED = 0x8, ++ VKD3D_SM4_DATA_UNUSED = 0x9, ++}; ++ ++enum vkd3d_sm4_sampler_mode ++{ ++ VKD3D_SM4_SAMPLER_DEFAULT = 0x0, ++ VKD3D_SM4_SAMPLER_COMPARISON = 0x1, ++}; ++ ++enum vkd3d_sm4_shader_data_type ++{ ++ VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, ++ VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, ++}; ++ ++struct sm4_index_range ++{ ++ unsigned int index; ++ unsigned int count; ++ unsigned int mask; ++}; ++ ++struct sm4_index_range_array ++{ ++ unsigned int count; ++ struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; ++}; ++ ++struct vkd3d_shader_sm4_parser ++{ ++ const uint32_t *start, *end, *ptr; ++ ++ unsigned int output_map[MAX_REG_OUTPUT]; ++ ++ enum vkd3d_shader_opcode phase; ++ bool has_control_point_phase; ++ unsigned int input_register_masks[MAX_REG_OUTPUT]; ++ unsigned int output_register_masks[MAX_REG_OUTPUT]; ++ unsigned int patch_constant_register_masks[MAX_REG_OUTPUT]; ++ ++ struct sm4_index_range_array input_index_ranges; ++ struct sm4_index_range_array output_index_ranges; ++ struct sm4_index_range_array patch_constant_index_ranges; ++ ++ struct vkd3d_shader_parser p; ++}; ++ ++struct vkd3d_sm4_opcode_info ++{ ++ enum vkd3d_sm4_opcode opcode; ++ enum vkd3d_shader_opcode handler_idx; ++ char dst_info[SM4_MAX_DST_COUNT]; ++ char src_info[SM4_MAX_SRC_COUNT]; ++ void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, ++ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); ++}; ++ ++static const enum vkd3d_primitive_type output_primitive_type_table[] = ++{ ++ /* UNKNOWN */ VKD3D_PT_UNDEFINED, ++ /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, ++ /* UNKNOWN */ VKD3D_PT_UNDEFINED, ++ /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, ++ /* UNKNOWN */ VKD3D_PT_UNDEFINED, ++ /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, ++}; ++ ++static const enum vkd3d_primitive_type input_primitive_type_table[] = ++{ ++ /* UNKNOWN */ VKD3D_PT_UNDEFINED, ++ /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, ++ /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, ++ /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, ++ /* UNKNOWN */ VKD3D_PT_UNDEFINED, ++ /* UNKNOWN */ VKD3D_PT_UNDEFINED, ++ /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, ++ /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, ++}; ++ ++static const enum vkd3d_shader_resource_type resource_type_table[] = ++{ ++ /* 0 */ VKD3D_SHADER_RESOURCE_NONE, ++ /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, ++ /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, ++ /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, ++ /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, ++}; ++ ++static const enum vkd3d_data_type data_type_table[] = ++{ ++ /* 0 */ VKD3D_DATA_FLOAT, ++ /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, ++ /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, ++ /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, ++ /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, ++ /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, ++ /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, ++ /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, ++ /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, ++ /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, ++}; ++ ++static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) ++{ ++ return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); ++} ++ ++static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) ++{ ++ const struct vkd3d_shader_version *version = &sm4->p.shader_version; ++ ++ return version->major >= 5 && version->minor >= 1; ++} ++ ++static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, ++ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); ++static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, ++ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); ++ ++static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, ++ const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) ++{ ++ *register_space = 0; ++ ++ if (!shader_is_sm_5_1(priv)) ++ return true; ++ ++ if (*ptr >= end) ++ { ++ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); ++ return false; ++ } ++ ++ *register_space = *(*ptr)++; ++ return true; ++} ++ ++static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, ++ (struct vkd3d_shader_src_param *)&ins->src[0]); ++ ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? ++ VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; ++} ++ ++static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, ++ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_immediate_constant_buffer *icb; ++ enum vkd3d_sm4_shader_data_type type; ++ unsigned int icb_size; ++ ++ type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; ++ if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) ++ { ++ FIXME("Ignoring shader data type %#x.\n", type); ++ ins->handler_idx = VKD3DSIH_NOP; ++ return; ++ } ++ ++ ++tokens; ++ icb_size = token_count - 1; ++ if (icb_size % 4) ++ { ++ FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); ++ ins->handler_idx = VKD3DSIH_INVALID; ++ return; ++ } ++ ++ if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) ++ { ++ ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); ++ ins->handler_idx = VKD3DSIH_INVALID; ++ return; ++ } ++ icb->vec4_count = icb_size / 4; ++ memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); ++ shader_instruction_array_add_icb(&priv->p.instructions, icb); ++ ins->declaration.icb = icb; ++} ++ ++static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, ++ const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) ++{ ++ range->first = reg->idx[1].offset; ++ range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; ++ if (range->last < range->first) ++ { ++ FIXME("Invalid register range [%u:%u].\n", range->first, range->last); ++ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, ++ "Last register %u must not be less than first register %u in range.", range->last, range->first); ++ } ++} ++ ++static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; ++ enum vkd3d_sm4_resource_type resource_type; ++ const uint32_t *end = &tokens[token_count]; ++ enum vkd3d_sm4_data_type data_type; ++ enum vkd3d_data_type reg_data_type; ++ DWORD components; ++ unsigned int i; ++ ++ resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; ++ if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) ++ { ++ FIXME("Unhandled resource type %#x.\n", resource_type); ++ semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; ++ } ++ else ++ { ++ semantic->resource_type = resource_type_table[resource_type]; ++ } ++ ++ if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ++ || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) ++ { ++ semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) ++ >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; ++ } ++ ++ reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; ++ shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); ++ shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); ++ ++ components = *tokens++; ++ for (i = 0; i < VKD3D_VEC4_SIZE; i++) ++ { ++ data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); ++ ++ if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) ++ { ++ FIXME("Unhandled data type %#x.\n", data_type); ++ semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; ++ } ++ else ++ { ++ semantic->resource_data_type[i] = data_type_table[data_type]; ++ } ++ } ++ ++ if (reg_data_type == VKD3D_DATA_UAV) ++ ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; ++ ++ shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); ++} ++ ++static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ const uint32_t *end = &tokens[token_count]; ++ ++ shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); ++ shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); ++ if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) ++ ins->flags |= VKD3DSI_INDEXED_DYNAMIC; ++ ++ ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; ++ ins->declaration.cb.range.space = 0; ++ ++ if (shader_is_sm_5_1(priv)) ++ { ++ if (tokens >= end) ++ { ++ FIXME("Invalid ptr %p >= end %p.\n", tokens, end); ++ return; ++ } ++ ++ ins->declaration.cb.size = *tokens++; ++ shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); ++ } ++} ++ ++static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, ++ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ const uint32_t *end = &tokens[token_count]; ++ ++ ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; ++ if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) ++ FIXME("Unhandled sampler mode %#x.\n", ins->flags); ++ shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); ++ shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); ++ shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); ++} ++ ++static bool sm4_parser_is_in_fork_or_join_phase(const struct vkd3d_shader_sm4_parser *sm4) ++{ ++ return sm4->phase == VKD3DSIH_HS_FORK_PHASE || sm4->phase == VKD3DSIH_HS_JOIN_PHASE; ++} ++ ++static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; ++ unsigned int i, register_idx, register_count, write_mask; ++ enum vkd3d_shader_register_type type; ++ struct sm4_index_range_array *ranges; ++ unsigned int *io_masks; ++ ++ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, ++ &index_range->dst); ++ index_range->register_count = *tokens; ++ ++ register_idx = index_range->dst.reg.idx[index_range->dst.reg.idx_count - 1].offset; ++ register_count = index_range->register_count; ++ write_mask = index_range->dst.write_mask; ++ ++ if (vkd3d_write_mask_component_count(write_mask) != 1) ++ { ++ WARN("Unhandled write mask %#x.\n", write_mask); ++ vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK, ++ "Index range mask %#x is not scalar.", write_mask); ++ } ++ ++ switch ((type = index_range->dst.reg.type)) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_INCONTROLPOINT: ++ io_masks = priv->input_register_masks; ++ ranges = &priv->input_index_ranges; ++ break; ++ case VKD3DSPR_OUTPUT: ++ if (sm4_parser_is_in_fork_or_join_phase(priv)) ++ { ++ io_masks = priv->patch_constant_register_masks; ++ ranges = &priv->patch_constant_index_ranges; ++ } ++ else ++ { ++ io_masks = priv->output_register_masks; ++ ranges = &priv->output_index_ranges; ++ } ++ break; ++ case VKD3DSPR_COLOROUT: ++ case VKD3DSPR_OUTCONTROLPOINT: ++ io_masks = priv->output_register_masks; ++ ranges = &priv->output_index_ranges; ++ break; ++ case VKD3DSPR_PATCHCONST: ++ io_masks = priv->patch_constant_register_masks; ++ ranges = &priv->patch_constant_index_ranges; ++ break; ++ ++ default: ++ WARN("Unhandled register type %#x.\n", type); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, ++ "Invalid register type %#x for index range base %u, count %u, mask %#x.", ++ type, register_idx, register_count, write_mask); ++ return; ++ } ++ ++ for (i = 0; i < ranges->count; ++i) ++ { ++ struct sm4_index_range r = ranges->ranges[i]; ++ ++ if (!(r.mask & write_mask)) ++ continue; ++ /* Ranges with the same base but different lengths are not an issue. */ ++ if (register_idx == r.index) ++ continue; ++ ++ if ((r.index <= register_idx && register_idx - r.index < r.count) ++ || (register_idx < r.index && r.index - register_idx < register_count)) ++ { ++ WARN("Detected index range collision for base %u, count %u, mask %#x.\n", ++ register_idx, register_count, write_mask); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, ++ "Register index range base %u, count %u, mask %#x collides with a previous declaration.", ++ register_idx, register_count, write_mask); ++ return; ++ } ++ } ++ ranges->ranges[ranges->count].index = register_idx; ++ ranges->ranges[ranges->count].count = register_count; ++ ranges->ranges[ranges->count++].mask = write_mask; ++ ++ for (i = 0; i < register_count; ++i) ++ { ++ if ((io_masks[register_idx + i] & write_mask) != write_mask) ++ { ++ WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", ++ register_idx, register_count, write_mask); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, ++ "Input/output registers matching index range base %u, count %u, mask %#x were not declared.", ++ register_idx, register_count, write_mask); ++ return; ++ } ++ } ++} ++ ++static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ enum vkd3d_sm4_output_primitive_type primitive_type; ++ ++ primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; ++ if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) ++ ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; ++ else ++ ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; ++ ++ if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) ++ FIXME("Unhandled output primitive type %#x.\n", primitive_type); ++} ++ ++static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ enum vkd3d_sm4_input_primitive_type primitive_type; ++ ++ primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; ++ if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) ++ { ++ ins->declaration.primitive_type.type = VKD3D_PT_PATCH; ++ ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; ++ } ++ else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) ++ { ++ ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; ++ } ++ else ++ { ++ ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; ++ } ++ ++ if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) ++ FIXME("Unhandled input primitive type %#x.\n", primitive_type); ++} ++ ++static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.count = *tokens; ++} ++ ++static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); ++} ++ ++static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, ++ &ins->declaration.register_semantic.reg); ++ ins->declaration.register_semantic.sysval_semantic = *tokens; ++} ++ ++static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; ++ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); ++} ++ ++static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; ++ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, ++ &ins->declaration.register_semantic.reg); ++ ins->declaration.register_semantic.sysval_semantic = *tokens; ++} ++ ++static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.indexable_temp.register_idx = *tokens++; ++ ins->declaration.indexable_temp.register_size = *tokens++; ++ ins->declaration.indexable_temp.component_count = *tokens; ++} ++ ++static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; ++} ++ ++static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, ++ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; ++ src_params[0].reg.u.fp_body_idx = *tokens++; ++ shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); ++} ++ ++static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.index = *tokens; ++} ++ ++static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.index = *tokens++; ++ FIXME("Ignoring set of function bodies (count %u).\n", *tokens); ++} ++ ++static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.fp.index = *tokens++; ++ ins->declaration.fp.body_count = *tokens++; ++ ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; ++ ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; ++ FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); ++} ++ ++static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) ++ >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; ++} ++ ++static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) ++ >> VKD3D_SM5_TESSELLATOR_SHIFT; ++} ++ ++static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) ++ >> VKD3D_SM5_TESSELLATOR_SHIFT; ++} ++ ++static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) ++ >> VKD3D_SM5_TESSELLATOR_SHIFT; ++} ++ ++static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.max_tessellation_factor = *(float *)tokens; ++} ++ ++static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->declaration.thread_group_size.x = *tokens++; ++ ins->declaration.thread_group_size.y = *tokens++; ++ ins->declaration.thread_group_size.z = *tokens++; ++} ++ ++static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, ++ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; ++ const uint32_t *end = &tokens[token_count]; ++ ++ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); ++ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ++ ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; ++ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); ++} ++ ++static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; ++ const uint32_t *end = &tokens[token_count]; ++ ++ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); ++ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ++ ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; ++ resource->byte_stride = *tokens++; ++ if (resource->byte_stride % 4) ++ FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); ++ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); ++} ++ ++static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); ++ ins->declaration.tgsm_raw.byte_count = *tokens; ++ if (ins->declaration.tgsm_raw.byte_count % 4) ++ FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); ++} ++ ++static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, ++ &ins->declaration.tgsm_structured.reg); ++ ins->declaration.tgsm_structured.byte_stride = *tokens++; ++ ins->declaration.tgsm_structured.structure_count = *tokens; ++ if (ins->declaration.tgsm_structured.byte_stride % 4) ++ FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); ++} ++ ++static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; ++ const uint32_t *end = &tokens[token_count]; ++ ++ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); ++ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ++ resource->byte_stride = *tokens++; ++ if (resource->byte_stride % 4) ++ FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); ++ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); ++} ++ ++static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; ++ const uint32_t *end = &tokens[token_count]; ++ ++ shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); ++ shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ++ shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); ++} ++ ++static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, ++ const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++{ ++ ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; ++} ++ ++/* ++ * d -> VKD3D_DATA_DOUBLE ++ * f -> VKD3D_DATA_FLOAT ++ * i -> VKD3D_DATA_INT ++ * u -> VKD3D_DATA_UINT ++ * O -> VKD3D_DATA_OPAQUE ++ * R -> VKD3D_DATA_RESOURCE ++ * S -> VKD3D_DATA_SAMPLER ++ * U -> VKD3D_DATA_UAV ++ */ ++static const struct vkd3d_sm4_opcode_info opcode_table[] = ++{ ++ {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, ++ {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, ++ {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, ++ {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", ++ shader_sm4_read_conditional_op}, ++ {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, ++ {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, ++ {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", ++ shader_sm4_read_conditional_op}, ++ {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, ++ {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, ++ {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, ++ {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, ++ {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", ++ shader_sm4_read_conditional_op}, ++ {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, ++ {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, ++ {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, ++ {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, ++ {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, ++ {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, ++ {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, ++ {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, ++ {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, ++ {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, ++ {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, ++ {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, ++ {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, ++ {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, ++ {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, ++ {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, ++ {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", ++ shader_sm4_read_conditional_op}, ++ {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, ++ {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, ++ {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, ++ {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, ++ {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, ++ {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, ++ {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, ++ {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, ++ {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, ++ {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, ++ {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, ++ {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, ++ {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, ++ {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, ++ {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, ++ {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, ++ {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, ++ {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, ++ {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, ++ {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, ++ {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, ++ {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", ++ shader_sm4_read_shader_data}, ++ {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, ++ {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, ++ {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, ++ {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, ++ {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, ++ {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, ++ {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, ++ {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, ++ {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, ++ {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", ++ shader_sm4_read_conditional_op}, ++ {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, ++ {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, ++ {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, ++ {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, ++ {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, ++ {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, ++ {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, ++ {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, ++ {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, ++ {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, ++ {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, ++ {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, ++ {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, ++ {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, ++ {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, ++ {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, ++ {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, ++ {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, ++ {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, ++ {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, ++ {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, ++ {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, ++ {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, ++ {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", ++ shader_sm4_read_dcl_resource}, ++ {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", ++ shader_sm4_read_dcl_constant_buffer}, ++ {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", ++ shader_sm4_read_dcl_sampler}, ++ {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", ++ shader_sm4_read_dcl_index_range}, ++ {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", ++ shader_sm4_read_dcl_output_topology}, ++ {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", ++ shader_sm4_read_dcl_input_primitive}, ++ {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", ++ shader_sm4_read_declaration_count}, ++ {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", ++ shader_sm4_read_declaration_dst}, ++ {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", ++ shader_sm4_read_declaration_register_semantic}, ++ {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", ++ shader_sm4_read_declaration_register_semantic}, ++ {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", ++ shader_sm4_read_dcl_input_ps}, ++ {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", ++ shader_sm4_read_declaration_register_semantic}, ++ {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", ++ shader_sm4_read_dcl_input_ps_siv}, ++ {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", ++ shader_sm4_read_declaration_dst}, ++ {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", ++ shader_sm4_read_declaration_register_semantic}, ++ {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", ++ shader_sm4_read_declaration_count}, ++ {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", ++ shader_sm4_read_dcl_indexable_temp}, ++ {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", ++ shader_sm4_read_dcl_global_flags}, ++ {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, ++ {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, ++ {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, ++ {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, ++ {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, ++ {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, ++ {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, ++ {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, ++ {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, ++ {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, ++ {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", ++ shader_sm5_read_fcall}, ++ {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, ++ {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, ++ {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, ++ {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, ++ {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, ++ {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, ++ {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, ++ {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, ++ {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, ++ {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, ++ {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, ++ {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, ++ {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, ++ {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, ++ {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, ++ {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, ++ {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, ++ {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, ++ {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, ++ {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, ++ {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, ++ {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", ++ shader_sm5_read_dcl_function_body}, ++ {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", ++ shader_sm5_read_dcl_function_table}, ++ {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", ++ shader_sm5_read_dcl_interface}, ++ {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", ++ shader_sm5_read_control_point_count}, ++ {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", ++ shader_sm5_read_control_point_count}, ++ {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", ++ shader_sm5_read_dcl_tessellator_domain}, ++ {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", ++ shader_sm5_read_dcl_tessellator_partitioning}, ++ {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", ++ shader_sm5_read_dcl_tessellator_output_primitive}, ++ {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", ++ shader_sm5_read_dcl_hs_max_tessfactor}, ++ {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", ++ shader_sm4_read_declaration_count}, ++ {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", ++ shader_sm4_read_declaration_count}, ++ {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", ++ shader_sm5_read_dcl_thread_group}, ++ {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", ++ shader_sm4_read_dcl_resource}, ++ {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", ++ shader_sm5_read_dcl_uav_raw}, ++ {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", ++ shader_sm5_read_dcl_uav_structured}, ++ {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", ++ shader_sm5_read_dcl_tgsm_raw}, ++ {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", ++ shader_sm5_read_dcl_tgsm_structured}, ++ {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", ++ shader_sm5_read_dcl_resource_raw}, ++ {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", ++ shader_sm5_read_dcl_resource_structured}, ++ {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, ++ {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, ++ {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, ++ {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, ++ {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, ++ {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, ++ {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, ++ {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, ++ {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, ++ {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, ++ {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, ++ {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, ++ {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, ++ {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, ++ {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, ++ {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, ++ {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", ++ shader_sm5_read_sync}, ++ {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, ++ {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, ++ {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, ++ {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, ++ {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, ++ {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, ++ {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, ++ {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, ++ {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, ++ {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, ++ {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, ++ {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, ++ {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, ++ {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, ++ {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", ++ shader_sm4_read_declaration_count}, ++ {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, ++ {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, ++ {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, ++ {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, ++ {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, ++ {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, ++ {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, ++ {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, ++ {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, ++ {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, ++ {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, ++ {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, ++ {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, ++ {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, ++ {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, ++ {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, ++ {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, ++ {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, ++ {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, ++ {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, ++ {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, ++ {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, ++ {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, ++ {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, ++}; ++ ++static const enum vkd3d_shader_register_type register_type_table[] = ++{ ++ /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, ++ /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, ++ /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, ++ /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, ++ /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, ++ /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, ++ /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, ++ /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, ++ /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, ++ /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, ++ /* UNKNOWN */ ~0u, ++ /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, ++ /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, ++ /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, ++ /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, ++ /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, ++ /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, ++ /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, ++ /* UNKNOWN */ ~0u, ++ /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, ++ /* UNKNOWN */ ~0u, ++ /* UNKNOWN */ ~0u, ++ /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, ++ /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, ++ /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, ++ /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, ++ /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, ++ /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, ++ /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, ++ /* UNKNOWN */ ~0u, ++ /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, ++ /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, ++ /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, ++ /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, ++ /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, ++ /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, ++ /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, ++ /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, ++ /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, ++ /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, ++ /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, ++ /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, ++}; ++ ++static const enum vkd3d_shader_register_precision register_precision_table[] = ++{ ++ /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, ++ /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, ++ /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, ++ /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, ++ /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, ++ /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, ++}; ++ ++static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) ++ { ++ if (opcode == opcode_table[i].opcode) return &opcode_table[i]; ++ } ++ ++ return NULL; ++} ++ ++static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) ++{ ++ switch (sm4->p.shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_PIXEL: ++ if (reg->type == VKD3DSPR_OUTPUT) ++ { ++ unsigned int reg_idx = reg->idx[0].offset; ++ ++ if (reg_idx >= ARRAY_SIZE(sm4->output_map)) ++ { ++ /* Validated later */ ++ break; ++ } ++ ++ reg->type = VKD3DSPR_COLOROUT; ++ reg->idx[0].offset = sm4->output_map[reg_idx]; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static enum vkd3d_data_type map_data_type(char t) ++{ ++ switch (t) ++ { ++ case 'd': ++ return VKD3D_DATA_DOUBLE; ++ case 'f': ++ return VKD3D_DATA_FLOAT; ++ case 'i': ++ return VKD3D_DATA_INT; ++ case 'u': ++ return VKD3D_DATA_UINT; ++ case 'O': ++ return VKD3D_DATA_OPAQUE; ++ case 'R': ++ return VKD3D_DATA_RESOURCE; ++ case 'S': ++ return VKD3D_DATA_SAMPLER; ++ case 'U': ++ return VKD3D_DATA_UAV; ++ default: ++ ERR("Invalid data type '%c'.\n", t); ++ return VKD3D_DATA_FLOAT; ++ } ++} ++ ++static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) ++{ ++ struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); ++ ++ shader_instruction_array_destroy(&parser->instructions); ++ free_shader_desc(&parser->shader_desc); ++ vkd3d_free(sm4); ++} ++ ++static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, ++ const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) ++{ ++ if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) ++ { ++ struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); ++ ++ if (!(reg_idx->rel_addr = rel_addr)) ++ { ++ ERR("Failed to get src param for relative addressing.\n"); ++ return false; ++ } ++ ++ if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) ++ reg_idx->offset = *(*ptr)++; ++ else ++ reg_idx->offset = 0; ++ shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); ++ } ++ else ++ { ++ reg_idx->rel_addr = NULL; ++ reg_idx->offset = *(*ptr)++; ++ } ++ ++ return true; ++} ++ ++static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) ++{ ++ switch (register_type) ++ { ++ case VKD3D_SM4_RT_SAMPLER: ++ case VKD3D_SM4_RT_RESOURCE: ++ case VKD3D_SM4_RT_CONSTBUFFER: ++ case VKD3D_SM5_RT_UAV: ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, ++ enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) ++{ ++ enum vkd3d_sm4_register_precision precision; ++ enum vkd3d_sm4_register_type register_type; ++ enum vkd3d_sm4_extended_operand_type type; ++ enum vkd3d_sm4_register_modifier m; ++ uint32_t token, order, extended; ++ ++ if (*ptr >= end) ++ { ++ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); ++ return false; ++ } ++ token = *(*ptr)++; ++ ++ register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; ++ if (register_type >= ARRAY_SIZE(register_type_table) ++ || register_type_table[register_type] == VKD3DSPR_INVALID) ++ { ++ FIXME("Unhandled register type %#x.\n", register_type); ++ param->type = VKD3DSPR_TEMP; ++ } ++ else ++ { ++ param->type = register_type_table[register_type]; ++ } ++ param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; ++ param->non_uniform = false; ++ param->data_type = data_type; ++ ++ *modifier = VKD3DSPSM_NONE; ++ if (token & VKD3D_SM4_EXTENDED_OPERAND) ++ { ++ if (*ptr >= end) ++ { ++ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); ++ return false; ++ } ++ extended = *(*ptr)++; ++ ++ if (extended & VKD3D_SM4_EXTENDED_OPERAND) ++ { ++ FIXME("Skipping second-order extended operand.\n"); ++ *ptr += *ptr < end; ++ } ++ ++ type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; ++ if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) ++ { ++ m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; ++ switch (m) ++ { ++ case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: ++ *modifier = VKD3DSPSM_NEG; ++ break; ++ ++ case VKD3D_SM4_REGISTER_MODIFIER_ABS: ++ *modifier = VKD3DSPSM_ABS; ++ break; ++ ++ case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: ++ *modifier = VKD3DSPSM_ABSNEG; ++ break; ++ ++ default: ++ FIXME("Unhandled register modifier %#x.\n", m); ++ /* fall-through */ ++ case VKD3D_SM4_REGISTER_MODIFIER_NONE: ++ break; ++ } ++ ++ precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; ++ if (precision >= ARRAY_SIZE(register_precision_table) ++ || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) ++ { ++ FIXME("Unhandled register precision %#x.\n", precision); ++ param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; ++ } ++ else ++ { ++ param->precision = register_precision_table[precision]; ++ } ++ ++ if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) ++ param->non_uniform = true; ++ ++ extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK ++ | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK ++ | VKD3D_SM4_EXTENDED_OPERAND); ++ if (extended) ++ FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); ++ } ++ else if (type) ++ { ++ FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); ++ } ++ } ++ ++ order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; ++ ++ if (order < 1) ++ { ++ param->idx[0].offset = ~0u; ++ param->idx[0].rel_addr = NULL; ++ } ++ else ++ { ++ DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; ++ if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) ++ { ++ ERR("Failed to read register index.\n"); ++ return false; ++ } ++ } ++ ++ if (order < 2) ++ { ++ param->idx[1].offset = ~0u; ++ param->idx[1].rel_addr = NULL; ++ } ++ else ++ { ++ DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; ++ if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) ++ { ++ ERR("Failed to read register index.\n"); ++ return false; ++ } ++ } ++ ++ if (order < 3) ++ { ++ param->idx[2].offset = ~0u; ++ param->idx[2].rel_addr = NULL; ++ } ++ else ++ { ++ DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; ++ if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) ++ { ++ ERR("Failed to read register index.\n"); ++ return false; ++ } ++ } ++ ++ if (order > 3) ++ { ++ WARN("Unhandled order %u.\n", order); ++ return false; ++ } ++ ++ param->idx_count = order; ++ ++ if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) ++ { ++ enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; ++ unsigned int dword_count; ++ ++ switch (dimension) ++ { ++ case VKD3D_SM4_DIMENSION_SCALAR: ++ param->immconst_type = VKD3D_IMMCONST_SCALAR; ++ dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); ++ if (end - *ptr < dword_count) ++ { ++ WARN("Invalid ptr %p, end %p.\n", *ptr, end); ++ return false; ++ } ++ memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); ++ *ptr += dword_count; ++ break; ++ ++ case VKD3D_SM4_DIMENSION_VEC4: ++ param->immconst_type = VKD3D_IMMCONST_VEC4; ++ if (end - *ptr < VKD3D_VEC4_SIZE) ++ { ++ WARN("Invalid ptr %p, end %p.\n", *ptr, end); ++ return false; ++ } ++ memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); ++ *ptr += 4; ++ break; ++ ++ default: ++ FIXME("Unhandled dimension %#x.\n", dimension); ++ break; ++ } ++ } ++ else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) ++ { ++ /* SM5.1 places a symbol identifier in idx[0] and moves ++ * other values up one slot. Normalize to SM5.1. */ ++ param->idx[2] = param->idx[1]; ++ param->idx[1] = param->idx[0]; ++ ++param->idx_count; ++ } ++ ++ map_register(priv, param); ++ ++ return true; ++} ++ ++static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) ++{ ++ switch (reg->type) ++ { ++ case VKD3DSPR_COVERAGE: ++ case VKD3DSPR_DEPTHOUT: ++ case VKD3DSPR_DEPTHOUTGE: ++ case VKD3DSPR_DEPTHOUTLE: ++ case VKD3DSPR_GSINSTID: ++ case VKD3DSPR_LOCALTHREADINDEX: ++ case VKD3DSPR_OUTPOINTID: ++ case VKD3DSPR_PRIMID: ++ case VKD3DSPR_SAMPLEMASK: ++ case VKD3DSPR_OUTSTENCILREF: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static uint32_t swizzle_from_sm4(uint32_t s) ++{ ++ return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); ++} ++ ++static bool register_is_input_output(const struct vkd3d_shader_register *reg) ++{ ++ switch (reg->type) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_COLOROUT: ++ case VKD3DSPR_INCONTROLPOINT: ++ case VKD3DSPR_OUTCONTROLPOINT: ++ case VKD3DSPR_PATCHCONST: ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static bool register_is_control_point_input(const struct vkd3d_shader_register *reg, ++ const struct vkd3d_shader_sm4_parser *priv) ++{ ++ return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT ++ || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE ++ || priv->p.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); ++} ++ ++static unsigned int mask_from_swizzle(unsigned int swizzle) ++{ ++ return (1u << vkd3d_swizzle_get_component(swizzle, 0)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 1)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 2)) ++ | (1u << vkd3d_swizzle_get_component(swizzle, 3)); ++} ++ ++static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_parser *priv, ++ const struct vkd3d_shader_register *reg, unsigned int mask) ++{ ++ unsigned int idx_count = 1 + register_is_control_point_input(reg, priv); ++ const unsigned int *masks; ++ unsigned int register_idx; ++ ++ if (reg->idx_count != idx_count) ++ { ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT, ++ "Invalid index count %u for register type %#x; expected count %u.", ++ reg->idx_count, reg->type, idx_count); ++ return false; ++ } ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_INCONTROLPOINT: ++ masks = priv->input_register_masks; ++ break; ++ case VKD3DSPR_OUTPUT: ++ masks = sm4_parser_is_in_fork_or_join_phase(priv) ? priv->patch_constant_register_masks ++ : priv->output_register_masks; ++ break; ++ case VKD3DSPR_COLOROUT: ++ case VKD3DSPR_OUTCONTROLPOINT: ++ masks = priv->output_register_masks; ++ break; ++ case VKD3DSPR_PATCHCONST: ++ masks = priv->patch_constant_register_masks; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ register_idx = reg->idx[reg->idx_count - 1].offset; ++ /* The signature element registers have already been checked against MAX_REG_OUTPUT. */ ++ if (register_idx >= MAX_REG_OUTPUT || (masks[register_idx] & mask) != mask) ++ { ++ WARN("Failed to find signature element for register type %#x, index %u and mask %#x.\n", ++ reg->type, register_idx, mask); ++ vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER, ++ "Could not find signature element matching register type %#x, index %u and mask %#x.", ++ reg->type, register_idx, mask); ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, ++ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) ++{ ++ DWORD token; ++ ++ if (*ptr >= end) ++ { ++ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); ++ return false; ++ } ++ token = **ptr; ++ ++ if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) ++ { ++ ERR("Failed to read parameter.\n"); ++ return false; ++ } ++ ++ if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) ++ { ++ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ } ++ else ++ { ++ enum vkd3d_sm4_swizzle_type swizzle_type = ++ (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ ++ switch (swizzle_type) ++ { ++ case VKD3D_SM4_SWIZZLE_NONE: ++ if (shader_sm4_is_scalar_register(&src_param->reg)) ++ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ else ++ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ break; ++ ++ case VKD3D_SM4_SWIZZLE_SCALAR: ++ src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; ++ src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; ++ break; ++ ++ case VKD3D_SM4_SWIZZLE_VEC4: ++ src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); ++ break; ++ ++ default: ++ FIXME("Unhandled swizzle type %#x.\n", swizzle_type); ++ break; ++ } ++ } ++ ++ if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, ++ &src_param->reg, mask_from_swizzle(src_param->swizzle))) ++ return false; ++ ++ return true; ++} ++ ++static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, ++ const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) ++{ ++ enum vkd3d_shader_src_modifier modifier; ++ DWORD token; ++ ++ if (*ptr >= end) ++ { ++ WARN("Invalid ptr %p >= end %p.\n", *ptr, end); ++ return false; ++ } ++ token = **ptr; ++ ++ if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) ++ { ++ ERR("Failed to read parameter.\n"); ++ return false; ++ } ++ ++ if (modifier != VKD3DSPSM_NONE) ++ { ++ ERR("Invalid source modifier %#x on destination register.\n", modifier); ++ return false; ++ } ++ ++ dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; ++ if (data_type == VKD3D_DATA_DOUBLE) ++ dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); ++ /* Scalar registers are declared with no write mask in shader bytecode. */ ++ if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) ++ dst_param->write_mask = VKD3DSP_WRITEMASK_0; ++ dst_param->modifiers = 0; ++ dst_param->shift = 0; ++ ++ if (register_is_input_output(&dst_param->reg) && !shader_sm4_validate_input_output_register(priv, ++ &dst_param->reg, dst_param->write_mask)) ++ return false; ++ ++ return true; ++} ++ ++static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) ++{ ++ enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; ++ ++ switch (modifier_type) ++ { ++ case VKD3D_SM4_MODIFIER_AOFFIMMI: ++ { ++ static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER ++ | VKD3D_SM4_MODIFIER_MASK ++ | VKD3D_SM4_AOFFIMMI_U_MASK ++ | VKD3D_SM4_AOFFIMMI_V_MASK ++ | VKD3D_SM4_AOFFIMMI_W_MASK; ++ ++ /* Bit fields are used for sign extension. */ ++ struct ++ { ++ int u : 4; ++ int v : 4; ++ int w : 4; ++ } aoffimmi; ++ ++ if (modifier & ~recognized_bits) ++ FIXME("Unhandled instruction modifier %#x.\n", modifier); ++ ++ aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; ++ aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; ++ aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; ++ ins->texel_offset.u = aoffimmi.u; ++ ins->texel_offset.v = aoffimmi.v; ++ ins->texel_offset.w = aoffimmi.w; ++ break; ++ } ++ ++ case VKD3D_SM5_MODIFIER_DATA_TYPE: ++ { ++ DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; ++ unsigned int i; ++ ++ for (i = 0; i < VKD3D_VEC4_SIZE; i++) ++ { ++ enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); ++ ++ if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) ++ { ++ FIXME("Unhandled data type %#x.\n", data_type); ++ ins->resource_data_type[i] = VKD3D_DATA_FLOAT; ++ } ++ else ++ { ++ ins->resource_data_type[i] = data_type_table[data_type]; ++ } ++ } ++ break; ++ } ++ ++ case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: ++ { ++ enum vkd3d_sm4_resource_type resource_type ++ = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; ++ ++ if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) ++ ins->raw = true; ++ else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) ++ ins->structured = true; ++ ++ if (resource_type < ARRAY_SIZE(resource_type_table)) ++ ins->resource_type = resource_type_table[resource_type]; ++ else ++ { ++ FIXME("Unhandled resource type %#x.\n", resource_type); ++ ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; ++ } ++ ++ ins->resource_stride ++ = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; ++ break; ++ } ++ ++ default: ++ FIXME("Unhandled instruction modifier %#x.\n", modifier); ++ } ++} ++ ++static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) ++{ ++ const struct vkd3d_sm4_opcode_info *opcode_info; ++ uint32_t opcode_token, opcode, previous_token; ++ struct vkd3d_shader_dst_param *dst_params; ++ struct vkd3d_shader_src_param *src_params; ++ const uint32_t **ptr = &sm4->ptr; ++ unsigned int i, len; ++ size_t remaining; ++ const uint32_t *p; ++ DWORD precise; ++ ++ if (*ptr >= sm4->end) ++ { ++ WARN("End of byte-code, failed to read opcode.\n"); ++ goto fail; ++ } ++ remaining = sm4->end - *ptr; ++ ++ ++sm4->p.location.line; ++ ++ opcode_token = *(*ptr)++; ++ opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; ++ ++ len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); ++ if (!len) ++ { ++ if (remaining < 2) ++ { ++ WARN("End of byte-code, failed to read length token.\n"); ++ goto fail; ++ } ++ len = **ptr; ++ } ++ if (!len || remaining < len) ++ { ++ WARN("Read invalid length %u (remaining %zu).\n", len, remaining); ++ goto fail; ++ } ++ --len; ++ ++ if (!(opcode_info = get_opcode_info(opcode))) ++ { ++ FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); ++ ins->handler_idx = VKD3DSIH_INVALID; ++ *ptr += len; ++ return; ++ } ++ ++ ins->handler_idx = opcode_info->handler_idx; ++ if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE ++ || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) ++ sm4->phase = ins->handler_idx; ++ sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; ++ ins->flags = 0; ++ ins->coissue = false; ++ ins->raw = false; ++ ins->structured = false; ++ ins->predicate = NULL; ++ ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); ++ ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); ++ ins->src = src_params = shader_parser_get_src_params(&sm4->p, ins->src_count); ++ if (!src_params && ins->src_count) ++ { ++ ERR("Failed to allocate src parameters.\n"); ++ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); ++ ins->handler_idx = VKD3DSIH_INVALID; ++ return; ++ } ++ ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; ++ ins->resource_stride = 0; ++ ins->resource_data_type[0] = VKD3D_DATA_FLOAT; ++ ins->resource_data_type[1] = VKD3D_DATA_FLOAT; ++ ins->resource_data_type[2] = VKD3D_DATA_FLOAT; ++ ins->resource_data_type[3] = VKD3D_DATA_FLOAT; ++ memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); ++ ++ p = *ptr; ++ *ptr += len; ++ ++ if (opcode_info->read_opcode_func) ++ { ++ ins->dst = NULL; ++ ins->dst_count = 0; ++ opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); ++ } ++ else ++ { ++ enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; ++ ++ previous_token = opcode_token; ++ while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) ++ shader_sm4_read_instruction_modifier(previous_token = *p++, ins); ++ ++ ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) ++ { ++ ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; ++ instruction_dst_modifier = VKD3DSPDM_SATURATE; ++ } ++ precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; ++ ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; ++ ++ ins->dst = dst_params = shader_parser_get_dst_params(&sm4->p, ins->dst_count); ++ if (!dst_params && ins->dst_count) ++ { ++ ERR("Failed to allocate dst parameters.\n"); ++ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); ++ ins->handler_idx = VKD3DSIH_INVALID; ++ return; ++ } ++ for (i = 0; i < ins->dst_count; ++i) ++ { ++ if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), ++ &dst_params[i]))) ++ { ++ ins->handler_idx = VKD3DSIH_INVALID; ++ return; ++ } ++ dst_params[i].modifiers |= instruction_dst_modifier; ++ } ++ ++ for (i = 0; i < ins->src_count; ++i) ++ { ++ if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), ++ &src_params[i]))) ++ { ++ ins->handler_idx = VKD3DSIH_INVALID; ++ return; ++ } ++ } ++ } ++ ++ return; ++ ++fail: ++ *ptr = sm4->end; ++ ins->handler_idx = VKD3DSIH_INVALID; ++ return; ++} ++ ++static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = ++{ ++ .parser_destroy = shader_sm4_destroy, ++}; ++ ++static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, ++ size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_version version; ++ uint32_t version_token, token_count; ++ unsigned int i; ++ ++ if (byte_code_size / sizeof(*byte_code) < 2) ++ { ++ WARN("Invalid byte code size %lu.\n", (long)byte_code_size); ++ return false; ++ } ++ ++ version_token = byte_code[0]; ++ TRACE("Version: 0x%08x.\n", version_token); ++ token_count = byte_code[1]; ++ TRACE("Token count: %u.\n", token_count); ++ ++ if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) ++ { ++ WARN("Invalid token count %u.\n", token_count); ++ return false; ++ } ++ ++ sm4->start = &byte_code[2]; ++ sm4->end = &byte_code[token_count]; ++ ++ switch (version_token >> 16) ++ { ++ case VKD3D_SM4_PS: ++ version.type = VKD3D_SHADER_TYPE_PIXEL; ++ break; ++ ++ case VKD3D_SM4_VS: ++ version.type = VKD3D_SHADER_TYPE_VERTEX; ++ break; ++ ++ case VKD3D_SM4_GS: ++ version.type = VKD3D_SHADER_TYPE_GEOMETRY; ++ break; ++ ++ case VKD3D_SM5_HS: ++ version.type = VKD3D_SHADER_TYPE_HULL; ++ break; ++ ++ case VKD3D_SM5_DS: ++ version.type = VKD3D_SHADER_TYPE_DOMAIN; ++ break; ++ ++ case VKD3D_SM5_CS: ++ version.type = VKD3D_SHADER_TYPE_COMPUTE; ++ break; ++ ++ default: ++ FIXME("Unrecognised shader type %#x.\n", version_token >> 16); ++ } ++ version.major = VKD3D_SM4_VERSION_MAJOR(version_token); ++ version.minor = VKD3D_SM4_VERSION_MINOR(version_token); ++ ++ /* Estimate instruction count to avoid reallocation in most shaders. */ ++ if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, ++ token_count / 7u + 20)) ++ return false; ++ sm4->ptr = sm4->start; ++ ++ memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); ++ for (i = 0; i < output_signature->element_count; ++i) ++ { ++ struct signature_element *e = &output_signature->elements[i]; ++ ++ if (version.type == VKD3D_SHADER_TYPE_PIXEL ++ && ascii_strcasecmp(e->semantic_name, "SV_Target")) ++ continue; ++ if (e->register_index >= ARRAY_SIZE(sm4->output_map)) ++ { ++ WARN("Invalid output index %u.\n", e->register_index); ++ continue; ++ } ++ ++ sm4->output_map[e->register_index] = e->semantic_index; ++ } ++ ++ return true; ++} ++ ++static bool shader_sm4_parser_validate_signature(struct vkd3d_shader_sm4_parser *sm4, ++ const struct shader_signature *signature, unsigned int *masks, const char *name) ++{ ++ unsigned int i, register_idx, register_count, mask; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ register_idx = signature->elements[i].register_index; ++ register_count = signature->elements[i].register_count; ++ if (register_idx != ~0u && (register_idx >= MAX_REG_OUTPUT || MAX_REG_OUTPUT - register_idx < register_count)) ++ { ++ WARN("%s signature element %u unhandled register index %u, count %u.\n", ++ name, i, register_idx, register_count); ++ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS, ++ "%s signature element %u register index %u, count %u exceeds maximum index of %u.", name, ++ i, register_idx, register_count, MAX_REG_OUTPUT - 1); ++ return false; ++ } ++ ++ if (!vkd3d_bitmask_is_contiguous(mask = signature->elements[i].mask)) ++ { ++ WARN("%s signature element %u mask %#x is not contiguous.\n", name, i, mask); ++ vkd3d_shader_parser_warning(&sm4->p, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS, ++ "%s signature element %u mask %#x is not contiguous.", name, i, mask); ++ } ++ ++ if (register_idx != ~0u) ++ masks[register_idx] |= mask; ++ } ++ ++ return true; ++} ++ ++static int index_range_compare(const void *a, const void *b) ++{ ++ return memcmp(a, b, sizeof(struct sm4_index_range)); ++} ++ ++static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_sm4_parser *sm4) ++{ ++ if (!sm4->input_index_ranges.count || !sm4->output_index_ranges.count) ++ return; ++ ++ if (sm4->input_index_ranges.count == sm4->output_index_ranges.count) ++ { ++ qsort(sm4->input_index_ranges.ranges, sm4->input_index_ranges.count, sizeof(sm4->input_index_ranges.ranges[0]), ++ index_range_compare); ++ qsort(sm4->output_index_ranges.ranges, sm4->output_index_ranges.count, sizeof(sm4->output_index_ranges.ranges[0]), ++ index_range_compare); ++ if (!memcmp(sm4->input_index_ranges.ranges, sm4->output_index_ranges.ranges, ++ sm4->input_index_ranges.count * sizeof(sm4->input_index_ranges.ranges[0]))) ++ return; ++ } ++ ++ /* This is very unlikely to occur and would complicate the default control point phase implementation. */ ++ WARN("Default phase index ranges are not identical.\n"); ++ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, ++ "Default control point phase input and output index range declarations are not identical."); ++ return; ++} ++ ++int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) ++{ ++ struct vkd3d_shader_instruction_array *instructions; ++ struct vkd3d_shader_desc *shader_desc; ++ struct vkd3d_shader_instruction *ins; ++ struct vkd3d_shader_sm4_parser *sm4; ++ int ret; ++ ++ if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) ++ { ++ ERR("Failed to allocate parser.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ shader_desc = &sm4->p.shader_desc; ++ if ((ret = shader_extract_from_dxbc(&compile_info->source, ++ message_context, compile_info->source_name, shader_desc)) < 0) ++ { ++ WARN("Failed to extract shader, vkd3d result %d.\n", ret); ++ vkd3d_free(sm4); ++ return ret; ++ } ++ ++ if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, ++ compile_info->source_name, &shader_desc->output_signature, message_context)) ++ { ++ WARN("Failed to initialise shader parser.\n"); ++ free_shader_desc(shader_desc); ++ vkd3d_free(sm4); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ ++ if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, ++ sm4->input_register_masks, "Input") ++ || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, ++ sm4->output_register_masks, "Output") ++ || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, ++ sm4->patch_constant_register_masks, "Patch constant")) ++ { ++ shader_sm4_destroy(&sm4->p); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ instructions = &sm4->p.instructions; ++ while (sm4->ptr != sm4->end) ++ { ++ if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) ++ { ++ ERR("Failed to allocate instructions.\n"); ++ vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); ++ shader_sm4_destroy(&sm4->p); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ins = &instructions->elements[instructions->count]; ++ shader_sm4_read_instruction(sm4, ins); ++ ++ if (ins->handler_idx == VKD3DSIH_INVALID) ++ { ++ WARN("Encountered unrecognized or invalid instruction.\n"); ++ shader_sm4_destroy(&sm4->p); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++instructions->count; ++ } ++ if (sm4->p.shader_version.type == VKD3D_SHADER_TYPE_HULL && !sm4->has_control_point_phase && !sm4->p.failed) ++ shader_sm4_validate_default_phase_index_ranges(sm4); ++ ++ *parser = &sm4->p; ++ ++ return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; ++} ++ ++static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); ++ ++static bool type_is_integer(const struct hlsl_type *type) ++{ ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, ++ bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) ++{ ++ unsigned int i; ++ ++ static const struct ++ { ++ const char *semantic; ++ bool output; ++ enum vkd3d_shader_type shader_type; ++ enum vkd3d_sm4_swizzle_type swizzle_type; ++ enum vkd3d_sm4_register_type type; ++ bool has_idx; ++ } ++ register_table[] = ++ { ++ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, ++ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, ++ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, ++ ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, ++ ++ /* Put sv_target in this table, instead of letting it fall through to ++ * default varying allocation, so that the register index matches the ++ * usage index. */ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, ++ }; ++ ++ for (i = 0; i < ARRAY_SIZE(register_table); ++i) ++ { ++ if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) ++ && output == register_table[i].output ++ && ctx->profile->type == register_table[i].shader_type) ++ { ++ *type = register_table[i].type; ++ if (swizzle_type) ++ *swizzle_type = register_table[i].swizzle_type; ++ *has_idx = register_table[i].has_idx; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, ++ bool output, D3D_NAME *usage) ++{ ++ unsigned int i; ++ ++ static const struct ++ { ++ const char *name; ++ bool output; ++ enum vkd3d_shader_type shader_type; ++ D3DDECLUSAGE usage; ++ } ++ semantics[] = ++ { ++ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, ++ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, ++ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, ++ ++ {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, ++ ++ {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, ++ {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, ++ ++ {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, ++ {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, ++ ++ {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, ++ {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, ++ ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, ++ }; ++ ++ for (i = 0; i < ARRAY_SIZE(semantics); ++i) ++ { ++ if (!ascii_strcasecmp(semantic->name, semantics[i].name) ++ && output == semantics[i].output ++ && ctx->profile->type == semantics[i].shader_type ++ && !ascii_strncasecmp(semantic->name, "sv_", 3)) ++ { ++ *usage = semantics[i].usage; ++ return true; ++ } ++ } ++ ++ if (!ascii_strncasecmp(semantic->name, "sv_", 3)) ++ return false; ++ ++ *usage = D3D_NAME_UNDEFINED; ++ return true; ++} ++ ++static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) ++{ ++ /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN ++ * sections to be aligned. Without this, the sections themselves will be ++ * aligned, but their reported sizes won't. */ ++ size_t size = bytecode_align(buffer); ++ ++ dxbc_writer_add_section(dxbc, tag, buffer->data, size); ++} ++ ++static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) ++{ ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ struct vkd3d_string_buffer *string; ++ const struct hlsl_ir_var *var; ++ size_t count_position; ++ unsigned int i; ++ bool ret; ++ ++ count_position = put_u32(&buffer, 0); ++ put_u32(&buffer, 8); /* unknown */ ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; ++ enum vkd3d_sm4_register_type type; ++ uint32_t usage_idx, reg_idx; ++ D3D_NAME usage; ++ bool has_idx; ++ ++ if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) ++ continue; ++ ++ ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); ++ assert(ret); ++ if (usage == ~0u) ++ continue; ++ usage_idx = var->semantic.index; ++ ++ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) ++ { ++ reg_idx = has_idx ? var->semantic.index : ~0u; ++ } ++ else ++ { ++ assert(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ type = VKD3D_SM4_RT_INPUT; ++ reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; ++ } ++ ++ use_mask = width; /* FIXME: accurately report use mask */ ++ if (output) ++ use_mask = 0xf ^ use_mask; ++ ++ /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ ++ if (usage >= 64) ++ usage = 0; ++ ++ put_u32(&buffer, 0); /* name */ ++ put_u32(&buffer, usage_idx); ++ put_u32(&buffer, usage); ++ switch (var->data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); ++ break; ++ ++ case HLSL_TYPE_INT: ++ put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); ++ break; ++ ++ default: ++ if ((string = hlsl_type_to_string(ctx, var->data_type))) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Invalid data type %s for semantic variable %s.", string->buffer, var->name); ++ hlsl_release_string_buffer(ctx, string); ++ put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); ++ } ++ put_u32(&buffer, reg_idx); ++ put_u32(&buffer, vkd3d_make_u16(width, use_mask)); ++ } ++ ++ i = 0; ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ const char *semantic = var->semantic.name; ++ size_t string_offset; ++ D3D_NAME usage; ++ ++ if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) ++ continue; ++ ++ hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); ++ if (usage == ~0u) ++ continue; ++ ++ if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) ++ string_offset = put_string(&buffer, "SV_Target"); ++ else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) ++ string_offset = put_string(&buffer, "SV_Depth"); ++ else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) ++ string_offset = put_string(&buffer, "SV_Position"); ++ else ++ string_offset = put_string(&buffer, semantic); ++ set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); ++ } ++ ++ set_u32(&buffer, count_position, i); ++ ++ add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); ++} ++ ++static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) ++{ ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ return sm4_class(type->e.array.type); ++ case HLSL_CLASS_MATRIX: ++ assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) ++ return D3D_SVC_MATRIX_COLUMNS; ++ else ++ return D3D_SVC_MATRIX_ROWS; ++ case HLSL_CLASS_OBJECT: ++ return D3D_SVC_OBJECT; ++ case HLSL_CLASS_SCALAR: ++ return D3D_SVC_SCALAR; ++ case HLSL_CLASS_STRUCT: ++ return D3D_SVC_STRUCT; ++ case HLSL_CLASS_VECTOR: ++ return D3D_SVC_VECTOR; ++ default: ++ ERR("Invalid class %#x.\n", type->class); ++ vkd3d_unreachable(); ++ } ++} ++ ++static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) ++{ ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_BOOL: ++ return D3D_SVT_BOOL; ++ case HLSL_TYPE_DOUBLE: ++ return D3D_SVT_DOUBLE; ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ return D3D_SVT_FLOAT; ++ case HLSL_TYPE_INT: ++ return D3D_SVT_INT; ++ case HLSL_TYPE_PIXELSHADER: ++ return D3D_SVT_PIXELSHADER; ++ case HLSL_TYPE_SAMPLER: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3D_SVT_SAMPLER1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3D_SVT_SAMPLER2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3D_SVT_SAMPLER3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3D_SVT_SAMPLERCUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3D_SVT_SAMPLER; ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ case HLSL_TYPE_STRING: ++ return D3D_SVT_STRING; ++ case HLSL_TYPE_TEXTURE: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3D_SVT_TEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3D_SVT_TEXTURE2D; ++ case HLSL_SAMPLER_DIM_2DMS: ++ return D3D_SVT_TEXTURE2DMS; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3D_SVT_TEXTURE3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3D_SVT_TEXTURECUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3D_SVT_TEXTURE; ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ case HLSL_TYPE_UINT: ++ return D3D_SVT_UINT; ++ case HLSL_TYPE_VERTEXSHADER: ++ return D3D_SVT_VERTEXSHADER; ++ case HLSL_TYPE_VOID: ++ return D3D_SVT_VOID; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) ++{ ++ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); ++ const char *name = array_type->name ? array_type->name : ""; ++ const struct hlsl_profile_info *profile = ctx->profile; ++ unsigned int field_count = 0, array_size = 0; ++ size_t fields_offset = 0, name_offset = 0; ++ size_t i; ++ ++ if (type->bytecode_offset) ++ return; ++ ++ if (profile->major_version >= 5) ++ name_offset = put_string(buffer, name); ++ ++ if (type->class == HLSL_CLASS_ARRAY) ++ array_size = hlsl_get_multiarray_size(type); ++ ++ if (array_type->class == HLSL_CLASS_STRUCT) ++ { ++ field_count = array_type->e.record.field_count; ++ ++ for (i = 0; i < field_count; ++i) ++ { ++ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; ++ ++ field->name_bytecode_offset = put_string(buffer, field->name); ++ write_sm4_type(ctx, buffer, field->type); ++ } ++ ++ fields_offset = bytecode_align(buffer); ++ ++ for (i = 0; i < field_count; ++i) ++ { ++ struct hlsl_struct_field *field = &array_type->e.record.fields[i]; ++ ++ put_u32(buffer, field->name_bytecode_offset); ++ put_u32(buffer, field->type->bytecode_offset); ++ put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); ++ } ++ } ++ ++ type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); ++ put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); ++ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); ++ put_u32(buffer, fields_offset); ++ ++ if (profile->major_version >= 5) ++ { ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, 0); /* FIXME: unknown */ ++ put_u32(buffer, name_offset); ++ } ++} ++ ++static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) ++{ ++ if (type->class == HLSL_CLASS_ARRAY) ++ return sm4_resource_type(type->e.array.type); ++ ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_SAMPLER: ++ return D3D_SIT_SAMPLER; ++ case HLSL_TYPE_TEXTURE: ++ return D3D_SIT_TEXTURE; ++ case HLSL_TYPE_UAV: ++ return D3D_SIT_UAV_RWTYPED; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) ++{ ++ if (type->class == HLSL_CLASS_ARRAY) ++ return sm4_resource_format(type->e.array.type); ++ ++ switch (type->e.resource_format->base_type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ return D3D_RETURN_TYPE_DOUBLE; ++ ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ return D3D_RETURN_TYPE_FLOAT; ++ ++ case HLSL_TYPE_INT: ++ return D3D_RETURN_TYPE_SINT; ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ return D3D_RETURN_TYPE_UINT; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) ++{ ++ if (type->class == HLSL_CLASS_ARRAY) ++ return sm4_rdef_resource_dimension(type->e.array.type); ++ ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3D_SRV_DIMENSION_TEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3D_SRV_DIMENSION_TEXTURE2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3D_SRV_DIMENSION_TEXTURE3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3D_SRV_DIMENSION_TEXTURECUBE; ++ case HLSL_SAMPLER_DIM_1DARRAY: ++ return D3D_SRV_DIMENSION_TEXTURE1DARRAY; ++ case HLSL_SAMPLER_DIM_2DARRAY: ++ return D3D_SRV_DIMENSION_TEXTURE2DARRAY; ++ case HLSL_SAMPLER_DIM_2DMS: ++ return D3D_SRV_DIMENSION_TEXTURE2DMS; ++ case HLSL_SAMPLER_DIM_2DMSARRAY: ++ return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; ++ case HLSL_SAMPLER_DIM_CUBEARRAY: ++ return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; ++ case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ return D3D_SRV_DIMENSION_BUFFER; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static int sm4_compare_extern_resources(const void *a, const void *b) ++{ ++ const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; ++ const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; ++ enum hlsl_regset aa_regset, bb_regset; ++ ++ aa_regset = hlsl_type_get_regset(aa->data_type); ++ bb_regset = hlsl_type_get_regset(bb->data_type); ++ ++ if (aa_regset != bb_regset) ++ return aa_regset - bb_regset; ++ ++ return aa->regs[aa_regset].id - bb->regs[bb_regset].id; ++} ++ ++static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) ++{ ++ const struct hlsl_ir_var **extern_resources = NULL; ++ const struct hlsl_ir_var *var; ++ enum hlsl_regset regset; ++ size_t capacity = 0; ++ ++ *count = 0; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (!hlsl_type_is_resource(var->data_type)) ++ continue; ++ regset = hlsl_type_get_regset(var->data_type); ++ if (!var->regs[regset].allocated) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, ++ sizeof(*extern_resources)))) ++ { ++ *count = 0; ++ return NULL; ++ } ++ ++ extern_resources[*count] = var; ++ ++*count; ++ } ++ ++ qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); ++ return extern_resources; ++} ++ ++static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) ++{ ++ unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; ++ size_t cbuffers_offset, resources_offset, creator_offset, string_offset; ++ size_t cbuffer_position, resource_position, creator_position; ++ const struct hlsl_profile_info *profile = ctx->profile; ++ const struct hlsl_ir_var **extern_resources; ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ const struct hlsl_buffer *cbuffer; ++ const struct hlsl_ir_var *var; ++ ++ static const uint16_t target_types[] = ++ { ++ 0xffff, /* PIXEL */ ++ 0xfffe, /* VERTEX */ ++ 0x4753, /* GEOMETRY */ ++ 0x4853, /* HULL */ ++ 0x4453, /* DOMAIN */ ++ 0x4353, /* COMPUTE */ ++ }; ++ ++ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); ++ ++ resource_count += extern_resources_count; ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (cbuffer->reg.allocated) ++ { ++ ++cbuffer_count; ++ ++resource_count; ++ } ++ } ++ ++ put_u32(&buffer, cbuffer_count); ++ cbuffer_position = put_u32(&buffer, 0); ++ put_u32(&buffer, resource_count); ++ resource_position = put_u32(&buffer, 0); ++ put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), ++ target_types[profile->type])); ++ put_u32(&buffer, 0); /* FIXME: compilation flags */ ++ creator_position = put_u32(&buffer, 0); ++ ++ if (profile->major_version >= 5) ++ { ++ put_u32(&buffer, TAG_RD11); ++ put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ ++ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ ++ put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ ++ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ ++ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ ++ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ ++ put_u32(&buffer, 0); /* unknown; possibly a null terminator */ ++ } ++ ++ /* Bound resources. */ ++ ++ resources_offset = bytecode_align(&buffer); ++ set_u32(&buffer, resource_position, resources_offset); ++ ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ enum hlsl_regset regset; ++ uint32_t flags = 0; ++ ++ var = extern_resources[i]; ++ regset = hlsl_type_get_regset(var->data_type); ++ ++ if (var->reg_reservation.reg_type) ++ flags |= D3D_SIF_USERPACKED; ++ ++ put_u32(&buffer, 0); /* name */ ++ put_u32(&buffer, sm4_resource_type(var->data_type)); ++ if (regset == HLSL_REGSET_SAMPLERS) ++ { ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ } ++ else ++ { ++ unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; ++ ++ put_u32(&buffer, sm4_resource_format(var->data_type)); ++ put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); ++ put_u32(&buffer, ~0u); /* FIXME: multisample count */ ++ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; ++ } ++ put_u32(&buffer, var->regs[regset].id); ++ put_u32(&buffer, var->regs[regset].bind_count); ++ put_u32(&buffer, flags); ++ } ++ ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ uint32_t flags = 0; ++ ++ if (!cbuffer->reg.allocated) ++ continue; ++ ++ if (cbuffer->reservation.reg_type) ++ flags |= D3D_SIF_USERPACKED; ++ ++ put_u32(&buffer, 0); /* name */ ++ put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); ++ put_u32(&buffer, 0); /* return type */ ++ put_u32(&buffer, 0); /* dimension */ ++ put_u32(&buffer, 0); /* multisample count */ ++ put_u32(&buffer, cbuffer->reg.id); /* bind point */ ++ put_u32(&buffer, 1); /* bind count */ ++ put_u32(&buffer, flags); /* flags */ ++ } ++ ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ var = extern_resources[i]; ++ ++ string_offset = put_string(&buffer, var->name); ++ set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); ++ } ++ ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (!cbuffer->reg.allocated) ++ continue; ++ ++ string_offset = put_string(&buffer, cbuffer->name); ++ set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); ++ } ++ ++ /* Buffers. */ ++ ++ cbuffers_offset = bytecode_align(&buffer); ++ set_u32(&buffer, cbuffer_position, cbuffers_offset); ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ unsigned int var_count = 0; ++ ++ if (!cbuffer->reg.allocated) ++ continue; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform && var->buffer == cbuffer) ++ ++var_count; ++ } ++ ++ put_u32(&buffer, 0); /* name */ ++ put_u32(&buffer, var_count); ++ put_u32(&buffer, 0); /* variable offset */ ++ put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); ++ put_u32(&buffer, 0); /* FIXME: flags */ ++ put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); ++ } ++ ++ i = 0; ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (!cbuffer->reg.allocated) ++ continue; ++ ++ string_offset = put_string(&buffer, cbuffer->name); ++ set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); ++ } ++ ++ i = 0; ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ size_t vars_start = bytecode_align(&buffer); ++ ++ if (!cbuffer->reg.allocated) ++ continue; ++ ++ set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform && var->buffer == cbuffer) ++ { ++ uint32_t flags = 0; ++ ++ if (var->last_read) ++ flags |= D3D_SVF_USED; ++ ++ put_u32(&buffer, 0); /* name */ ++ put_u32(&buffer, var->buffer_offset * sizeof(float)); ++ put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); ++ put_u32(&buffer, flags); ++ put_u32(&buffer, 0); /* type */ ++ put_u32(&buffer, 0); /* FIXME: default value */ ++ ++ if (profile->major_version >= 5) ++ { ++ put_u32(&buffer, 0); /* texture start */ ++ put_u32(&buffer, 0); /* texture count */ ++ put_u32(&buffer, 0); /* sampler start */ ++ put_u32(&buffer, 0); /* sampler count */ ++ } ++ } ++ } ++ ++ j = 0; ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform && var->buffer == cbuffer) ++ { ++ const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); ++ size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); ++ size_t string_offset = put_string(&buffer, var->name); ++ ++ set_u32(&buffer, var_offset, string_offset); ++ write_sm4_type(ctx, &buffer, var->data_type); ++ set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); ++ ++j; ++ } ++ } ++ } ++ ++ creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); ++ set_u32(&buffer, creator_position, creator_offset); ++ ++ add_section(dxbc, TAG_RDEF, &buffer); ++ ++ vkd3d_free(extern_resources); ++} ++ ++static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) ++{ ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return VKD3D_SM4_RESOURCE_TEXTURE_1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return VKD3D_SM4_RESOURCE_TEXTURE_2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return VKD3D_SM4_RESOURCE_TEXTURE_3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; ++ case HLSL_SAMPLER_DIM_1DARRAY: ++ return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; ++ case HLSL_SAMPLER_DIM_2DARRAY: ++ return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; ++ case HLSL_SAMPLER_DIM_2DMS: ++ return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; ++ case HLSL_SAMPLER_DIM_2DMSARRAY: ++ return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; ++ case HLSL_SAMPLER_DIM_CUBEARRAY: ++ return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; ++ case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ return VKD3D_SM4_RESOURCE_BUFFER; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++struct sm4_instruction_modifier ++{ ++ enum vkd3d_sm4_instruction_modifier type; ++ ++ union ++ { ++ struct ++ { ++ int u, v, w; ++ } aoffimmi; ++ } u; ++}; ++ ++static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) ++{ ++ uint32_t word = 0; ++ ++ word |= VKD3D_SM4_MODIFIER_MASK & imod->type; ++ ++ switch (imod->type) ++ { ++ case VKD3D_SM4_MODIFIER_AOFFIMMI: ++ assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); ++ assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); ++ assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); ++ word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; ++ word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; ++ word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ return word; ++} ++ ++struct sm4_register ++{ ++ enum vkd3d_sm4_register_type type; ++ uint32_t idx[2]; ++ unsigned int idx_count; ++ enum vkd3d_sm4_dimension dim; ++ uint32_t immconst_uint[4]; ++ unsigned int mod; ++}; ++ ++struct sm4_instruction ++{ ++ enum vkd3d_sm4_opcode opcode; ++ ++ struct sm4_instruction_modifier modifiers[1]; ++ unsigned int modifier_count; ++ ++ struct sm4_dst_register ++ { ++ struct sm4_register reg; ++ unsigned int writemask; ++ } dsts[2]; ++ unsigned int dst_count; ++ ++ struct sm4_src_register ++ { ++ struct sm4_register reg; ++ enum vkd3d_sm4_swizzle_type swizzle_type; ++ unsigned int swizzle; ++ } srcs[5]; ++ unsigned int src_count; ++ ++ unsigned int byte_stride; ++ ++ uint32_t idx[3]; ++ unsigned int idx_count; ++}; ++ ++static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, ++ unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, ++ const struct hlsl_deref *deref, const struct hlsl_type *data_type) ++{ ++ const struct hlsl_ir_var *var = deref->var; ++ ++ if (var->is_uniform) ++ { ++ enum hlsl_regset regset = hlsl_type_get_regset(data_type); ++ ++ if (regset == HLSL_REGSET_TEXTURES) ++ { ++ reg->type = VKD3D_SM4_RT_RESOURCE; ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ if (swizzle_type) ++ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; ++ reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(deref->offset_regset == HLSL_REGSET_TEXTURES); ++ reg->idx_count = 1; ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_UAVS) ++ { ++ reg->type = VKD3D_SM5_RT_UAV; ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ if (swizzle_type) ++ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; ++ reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(deref->offset_regset == HLSL_REGSET_UAVS); ++ reg->idx_count = 1; ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_SAMPLERS) ++ { ++ reg->type = VKD3D_SM4_RT_SAMPLER; ++ reg->dim = VKD3D_SM4_DIMENSION_NONE; ++ if (swizzle_type) ++ *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; ++ reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); ++ assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); ++ reg->idx_count = 1; ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; ++ ++ assert(data_type->class <= HLSL_CLASS_VECTOR); ++ reg->type = VKD3D_SM4_RT_CONSTBUFFER; ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ if (swizzle_type) ++ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; ++ reg->idx[0] = var->buffer->reg.id; ++ reg->idx[1] = offset / 4; ++ reg->idx_count = 2; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); ++ } ++ } ++ else if (var->is_input_semantic) ++ { ++ bool has_idx; ++ ++ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ assert(hlsl_reg.allocated); ++ reg->type = VKD3D_SM4_RT_INPUT; ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ if (swizzle_type) ++ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; ++ reg->idx[0] = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else if (var->is_output_semantic) ++ { ++ bool has_idx; ++ ++ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ if (reg->type == VKD3D_SM4_RT_DEPTHOUT) ++ reg->dim = VKD3D_SM4_DIMENSION_SCALAR; ++ else ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ assert(hlsl_reg.allocated); ++ reg->type = VKD3D_SM4_RT_OUTPUT; ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ reg->idx[0] = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ assert(hlsl_reg.allocated); ++ reg->type = VKD3D_SM4_RT_TEMP; ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ if (swizzle_type) ++ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; ++ reg->idx[0] = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++} ++ ++static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, ++ const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) ++{ ++ unsigned int writemask; ++ ++ sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); ++ if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) ++ src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); ++} ++ ++static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, ++ enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) ++{ ++ assert(instr->reg.allocated); ++ reg->type = VKD3D_SM4_RT_TEMP; ++ reg->dim = VKD3D_SM4_DIMENSION_VEC4; ++ *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; ++ reg->idx[0] = instr->reg.id; ++ reg->idx_count = 1; ++ *writemask = instr->reg.writemask; ++} ++ ++static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) ++{ ++ unsigned int swizzle_type; ++ ++ sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); ++} ++ ++static void sm4_src_from_node(struct sm4_src_register *src, ++ const struct hlsl_ir_node *instr, unsigned int map_writemask) ++{ ++ unsigned int writemask; ++ ++ sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); ++ if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) ++ src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); ++} ++ ++static void sm4_src_from_constant_value(struct sm4_src_register *src, ++ const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) ++{ ++ src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; ++ src->reg.type = VKD3D_SM4_RT_IMMCONST; ++ if (width == 1) ++ { ++ src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; ++ src->reg.immconst_uint[0] = value->u[0].u; ++ } ++ else ++ { ++ unsigned int i, j = 0; ++ ++ src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; ++ for (i = 0; i < 4; ++i) ++ { ++ if (map_writemask & (1u << i)) ++ src->reg.immconst_uint[i] = value->u[j++].u; ++ } ++ } ++} ++ ++static uint32_t sm4_encode_register(const struct sm4_register *reg) ++{ ++ return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) ++ | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) ++ | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); ++} ++ ++static uint32_t sm4_register_order(const struct sm4_register *reg) ++{ ++ uint32_t order = 1; ++ if (reg->type == VKD3D_SM4_RT_IMMCONST) ++ order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; ++ order += reg->idx_count; ++ if (reg->mod) ++ ++order; ++ return order; ++} ++ ++static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) ++{ ++ uint32_t token = instr->opcode; ++ unsigned int size = 1, i, j; ++ ++ size += instr->modifier_count; ++ for (i = 0; i < instr->dst_count; ++i) ++ size += sm4_register_order(&instr->dsts[i].reg); ++ for (i = 0; i < instr->src_count; ++i) ++ size += sm4_register_order(&instr->srcs[i].reg); ++ size += instr->idx_count; ++ if (instr->byte_stride) ++ ++size; ++ ++ token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); ++ ++ if (instr->modifier_count > 0) ++ token |= VKD3D_SM4_INSTRUCTION_MODIFIER; ++ put_u32(buffer, token); ++ ++ for (i = 0; i < instr->modifier_count; ++i) ++ { ++ token = sm4_encode_instruction_modifier(&instr->modifiers[i]); ++ if (instr->modifier_count > i + 1) ++ token |= VKD3D_SM4_INSTRUCTION_MODIFIER; ++ put_u32(buffer, token); ++ } ++ ++ for (i = 0; i < instr->dst_count; ++i) ++ { ++ token = sm4_encode_register(&instr->dsts[i].reg); ++ if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) ++ token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; ++ put_u32(buffer, token); ++ ++ for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) ++ put_u32(buffer, instr->dsts[i].reg.idx[j]); ++ } ++ ++ for (i = 0; i < instr->src_count; ++i) ++ { ++ token = sm4_encode_register(&instr->srcs[i].reg); ++ token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; ++ if (instr->srcs[i].reg.mod) ++ token |= VKD3D_SM4_EXTENDED_OPERAND; ++ put_u32(buffer, token); ++ ++ if (instr->srcs[i].reg.mod) ++ put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) ++ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); ++ ++ for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) ++ put_u32(buffer, instr->srcs[i].reg.idx[j]); ++ ++ if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) ++ { ++ put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); ++ if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) ++ { ++ put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); ++ put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); ++ put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); ++ } ++ } ++ } ++ ++ if (instr->byte_stride) ++ put_u32(buffer, instr->byte_stride); ++ ++ for (j = 0; j < instr->idx_count; ++j) ++ put_u32(buffer, instr->idx[j]); ++} ++ ++static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, ++ const struct hlsl_ir_node *texel_offset) ++{ ++ struct sm4_instruction_modifier modif; ++ struct hlsl_ir_constant *offset; ++ ++ if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) ++ return false; ++ offset = hlsl_ir_constant(texel_offset); ++ ++ modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; ++ modif.u.aoffimmi.u = offset->value.u[0].i; ++ modif.u.aoffimmi.v = offset->value.u[1].i; ++ modif.u.aoffimmi.w = offset->value.u[2].i; ++ if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 ++ || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 ++ || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) ++ return false; ++ ++ instr->modifiers[instr->modifier_count++] = modif; ++ return true; ++} ++ ++static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) ++{ ++ const struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, ++ ++ .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, ++ .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, ++ .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, ++ .srcs[0].reg.idx_count = 2, ++ .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, ++ .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), ++ .src_count = 1, ++ }; ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) ++{ ++ unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; ++ struct sm4_instruction instr; ++ ++ for (i = 0; i < count; ++i) ++ { ++ if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) ++ continue; ++ ++ instr = (struct sm4_instruction) ++ { ++ .opcode = VKD3D_SM4_OP_DCL_SAMPLER, ++ ++ .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, ++ .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id + i}, ++ .dsts[0].reg.idx_count = 1, ++ .dst_count = 1, ++ }; ++ ++ write_sm4_instruction(buffer, &instr); ++ } ++} ++ ++static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_var *var, bool uav) ++{ ++ enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; ++ unsigned int i, count = var->data_type->reg_size[regset]; ++ struct hlsl_type *component_type; ++ struct sm4_instruction instr; ++ ++ component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); ++ ++ for (i = 0; i < count; ++i) ++ { ++ if (!var->objects_usage[regset][i].used) ++ continue; ++ ++ instr = (struct sm4_instruction) ++ { ++ .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, ++ .dsts[0].reg.idx = {var->regs[regset].id + i}, ++ .dsts[0].reg.idx_count = 1, ++ .dst_count = 1, ++ ++ .idx[0] = sm4_resource_format(component_type) * 0x1111, ++ .idx_count = 1, ++ }; ++ ++ if (uav) ++ { ++ switch (var->data_type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; ++ instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; ++ break; ++ default: ++ instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; ++ break; ++ } ++ } ++ else ++ { ++ instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; ++ } ++ instr.opcode |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); ++ ++ if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) ++ { ++ instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; ++ } ++ ++ write_sm4_instruction(buffer, &instr); ++ } ++} ++ ++static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) ++{ ++ const struct hlsl_profile_info *profile = ctx->profile; ++ const bool output = var->is_output_semantic; ++ D3D_NAME usage; ++ bool has_idx; ++ ++ struct sm4_instruction instr = ++ { ++ .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, ++ .dst_count = 1, ++ }; ++ ++ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) ++ { ++ if (has_idx) ++ { ++ instr.dsts[0].reg.idx[0] = var->semantic.index; ++ instr.dsts[0].reg.idx_count = 1; ++ } ++ else ++ { ++ instr.dsts[0].reg.idx_count = 0; ++ } ++ instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; ++ } ++ else ++ { ++ instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; ++ instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; ++ instr.dsts[0].reg.idx_count = 1; ++ instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; ++ } ++ ++ if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) ++ instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; ++ ++ hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); ++ if (usage == ~0u) ++ usage = D3D_NAME_UNDEFINED; ++ ++ if (var->is_input_semantic) ++ { ++ switch (usage) ++ { ++ case D3D_NAME_UNDEFINED: ++ instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; ++ break; ++ ++ case D3D_NAME_INSTANCE_ID: ++ case D3D_NAME_PRIMITIVE_ID: ++ case D3D_NAME_VERTEX_ID: ++ instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; ++ break; ++ ++ default: ++ instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; ++ break; ++ } ++ ++ if (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ { ++ enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; ++ ++ if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) ++ mode = VKD3DSIM_CONSTANT; ++ ++ instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; ++ } ++ } ++ else ++ { ++ if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; ++ else ++ instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; ++ } ++ ++ switch (usage) ++ { ++ case D3D_NAME_COVERAGE: ++ case D3D_NAME_DEPTH: ++ case D3D_NAME_DEPTH_GREATER_EQUAL: ++ case D3D_NAME_DEPTH_LESS_EQUAL: ++ case D3D_NAME_TARGET: ++ case D3D_NAME_UNDEFINED: ++ break; ++ ++ default: ++ instr.idx_count = 1; ++ instr.idx[0] = usage; ++ break; ++ } ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_DCL_TEMPS, ++ ++ .idx = {temp_count}, ++ .idx_count = 1, ++ }; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, ++ ++ .idx = {thread_count[0], thread_count[1], thread_count[2]}, ++ .idx_count = 3, ++ }; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_RET, ++ }; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = opcode; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); ++ instr.srcs[0].reg.mod = src_mod; ++ instr.src_count = 1; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, ++ enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, ++ const struct hlsl_ir_node *src) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = opcode; ++ ++ assert(dst_idx < ARRAY_SIZE(instr.dsts)); ++ sm4_dst_from_node(&instr.dsts[dst_idx], dst); ++ assert(1 - dst_idx >= 0); ++ instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; ++ instr.dsts[1 - dst_idx].reg.idx_count = 0; ++ instr.dst_count = 2; ++ ++ sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = opcode; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); ++ sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++/* dp# instructions don't map the swizzle. */ ++static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++ const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = opcode; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, ++ enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, ++ const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = opcode; ++ ++ assert(dst_idx < ARRAY_SIZE(instr.dsts)); ++ sm4_dst_from_node(&instr.dsts[dst_idx], dst); ++ assert(1 - dst_idx >= 0); ++ instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; ++ instr.dsts[1 - dst_idx].reg.idx_count = 0; ++ instr.dst_count = 2; ++ ++ sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); ++ sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_constant(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) ++{ ++ const unsigned int dimx = constant->node.data_type->dimx; ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_MOV; ++ ++ sm4_dst_from_node(&instr.dsts[0], &constant->node); ++ instr.dst_count = 1; ++ ++ sm4_src_from_constant_value(&instr.srcs[0], &constant->value, dimx, instr.dsts[0].writemask); ++ instr.src_count = 1, ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, ++ const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, ++ const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, ++ enum hlsl_sampler_dim dim) ++{ ++ bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE ++ && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); ++ bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); ++ struct sm4_instruction instr; ++ unsigned int dim_count; ++ ++ memset(&instr, 0, sizeof(instr)); ++ if (uav) ++ instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; ++ else ++ instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; ++ ++ if (texel_offset) ++ { ++ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7."); ++ return; ++ } ++ } ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); ++ ++ if (!uav) ++ { ++ /* Mipmap level is in the last component in the IR, but needs to be in the W ++ * component in the instruction. */ ++ dim_count = hlsl_sampler_dim_count(dim); ++ if (dim_count == 1) ++ instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); ++ if (dim_count == 2) ++ instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); ++ } ++ ++ sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); ++ ++ instr.src_count = 2; ++ ++ if (multisampled) ++ { ++ if (sample_index->type == HLSL_IR_CONSTANT) ++ { ++ struct sm4_register *reg = &instr.srcs[2].reg; ++ struct hlsl_ir_constant *index; ++ ++ index = hlsl_ir_constant(sample_index); ++ ++ memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); ++ instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; ++ reg->type = VKD3D_SM4_RT_IMMCONST; ++ reg->dim = VKD3D_SM4_DIMENSION_SCALAR; ++ reg->immconst_uint[0] = index->value.u[0].u; ++ } ++ else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) ++ { ++ hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); ++ } ++ else ++ { ++ sm4_src_from_node(&instr.srcs[2], sample_index, 0); ++ } ++ ++ ++instr.src_count; ++ } ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_type *resource_type = load->resource.var->data_type; ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_deref *sampler = &load->sampler; ++ const struct hlsl_ir_node *dst = &load->node; ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_SAMPLE: ++ instr.opcode = VKD3D_SM4_OP_SAMPLE; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_B; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (texel_offset) ++ { ++ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7."); ++ return; ++ } ++ } ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); ++ sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); ++ instr.src_count = 3; ++ ++ if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD ++ || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) ++ { ++ sm4_src_from_node(&instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ ++instr.src_count; ++ } ++ else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) ++ { ++ sm4_src_from_node(&instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_node(&instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); ++ instr.src_count += 2; ++ } ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static bool type_is_float(const struct hlsl_type *type) ++{ ++ return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; ++} ++ ++static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, ++ const struct hlsl_ir_node *arg, uint32_t mask) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_AND; ++ ++ sm4_dst_from_node(&instr.dsts[0], &expr->node); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); ++ instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; ++ instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; ++ instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; ++ instr.srcs[1].reg.immconst_uint[0] = mask; ++ instr.src_count = 2; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_cast(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++{ ++ static const union ++ { ++ uint32_t u; ++ float f; ++ } one = { .f = 1.0 }; ++ const struct hlsl_ir_node *arg1 = expr->operands[0].node; ++ const struct hlsl_type *dst_type = expr->node.data_type; ++ const struct hlsl_type *src_type = arg1->data_type; ++ ++ /* Narrowing casts were already lowered. */ ++ assert(src_type->dimx == dst_type->dimx); ++ ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ switch (src_type->base_type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_TYPE_INT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_TYPE_UINT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_INT: ++ switch (src_type->base_type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_UINT: ++ switch (src_type->base_type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ /* Casts to bool should have already been lowered. */ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; ++ ++ sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_expr(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++{ ++ const struct hlsl_ir_node *arg1 = expr->operands[0].node; ++ const struct hlsl_ir_node *arg2 = expr->operands[1].node; ++ const struct hlsl_type *dst_type = expr->node.data_type; ++ struct vkd3d_string_buffer *dst_type_string; ++ ++ assert(expr->node.reg.allocated); ++ ++ if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) ++ return; ++ ++ switch (expr->op) ++ { ++ case HLSL_OP1_ABS: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP1_BIT_NOT: ++ assert(type_is_integer(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_CAST: ++ write_sm4_cast(ctx, buffer, expr); ++ break; ++ ++ case HLSL_OP1_COS: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); ++ break; ++ ++ case HLSL_OP1_DSX: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_DSY: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_EXP2: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_FLOOR: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_FRACT: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_LOG2: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_LOGIC_NOT: ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_NEG: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); ++ break; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP1_REINTERPRET: ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_ROUND: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_RSQ: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_SAT: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV ++ | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), ++ &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_SIN: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); ++ break; ++ ++ case HLSL_OP1_SQRT: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP1_TRUNC: ++ assert(type_is_float(dst_type)); ++ write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); ++ break; ++ ++ case HLSL_OP2_ADD: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP2_BIT_AND: ++ assert(type_is_integer(dst_type)); ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_BIT_OR: ++ assert(type_is_integer(dst_type)); ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_BIT_XOR: ++ assert(type_is_integer(dst_type)); ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_DIV: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP2_DOT: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ switch (arg1->data_type->dimx) ++ { ++ case 4: ++ write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); ++ break; ++ ++ case 3: ++ write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); ++ break; ++ ++ case 2: ++ write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); ++ break; ++ ++ case 1: ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP2_EQUAL: ++ { ++ const struct hlsl_type *src_type = arg1->data_type; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ break; ++ } ++ break; ++ } ++ ++ case HLSL_OP2_GEQUAL: ++ { ++ const struct hlsl_type *src_type = arg1->data_type; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_INT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ break; ++ } ++ break; ++ } ++ ++ case HLSL_OP2_LESS: ++ { ++ const struct hlsl_type *src_type = arg1->data_type; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_INT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ break; ++ } ++ break; ++ } ++ ++ case HLSL_OP2_LOGIC_AND: ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_LOGIC_OR: ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_LSHIFT: ++ assert(type_is_integer(dst_type)); ++ assert(dst_type->base_type != HLSL_TYPE_BOOL); ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_OP2_MAX: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_INT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP2_MIN: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_INT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP2_MOD: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP2_MUL: ++ switch (dst_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ /* Using IMUL instead of UMUL because we're taking the low ++ * bits, and the native compiler generates IMUL. */ ++ write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); ++ } ++ break; ++ ++ case HLSL_OP2_NEQUAL: ++ { ++ const struct hlsl_type *src_type = arg1->data_type; ++ ++ assert(dst_type->base_type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ break; ++ } ++ break; ++ } ++ ++ case HLSL_OP2_RSHIFT: ++ assert(type_is_integer(dst_type)); ++ assert(dst_type->base_type != HLSL_TYPE_BOOL); ++ write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, ++ &expr->node, arg1, arg2); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); ++ } ++ ++ hlsl_release_string_buffer(ctx, dst_type_string); ++} ++ ++static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, ++ .src_count = 1, ++ }; ++ ++ assert(iff->condition.node->data_type->dimx == 1); ++ ++ sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); ++ write_sm4_instruction(buffer, &instr); ++ ++ write_sm4_block(ctx, buffer, &iff->then_block); ++ ++ if (!list_empty(&iff->else_block.instrs)) ++ { ++ instr.opcode = VKD3D_SM4_OP_ELSE; ++ instr.src_count = 0; ++ write_sm4_instruction(buffer, &instr); ++ ++ write_sm4_block(ctx, buffer, &iff->else_block); ++ } ++ ++ instr.opcode = VKD3D_SM4_OP_ENDIF; ++ instr.src_count = 0; ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_jump(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) ++{ ++ struct sm4_instruction instr = {0}; ++ ++ switch (jump->type) ++ { ++ case HLSL_IR_JUMP_BREAK: ++ instr.opcode = VKD3D_SM4_OP_BREAK; ++ break; ++ ++ case HLSL_IR_JUMP_DISCARD: ++ { ++ struct sm4_register *reg = &instr.srcs[0].reg; ++ ++ instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; ++ ++ memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); ++ instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; ++ instr.src_count = 1; ++ reg->type = VKD3D_SM4_RT_IMMCONST; ++ reg->dim = VKD3D_SM4_DIMENSION_SCALAR; ++ reg->immconst_uint[0] = ~0u; ++ ++ break; ++ } ++ ++ case HLSL_IR_JUMP_RETURN: ++ vkd3d_unreachable(); ++ ++ default: ++ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); ++ return; ++ } ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++/* Does this variable's data come directly from the API user, rather than being ++ * temporary or from a previous shader stage? ++ * I.e. is it a uniform or VS input? */ ++static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) ++{ ++ if (var->is_uniform) ++ return true; ++ ++ return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; ++} ++ ++static void write_sm4_load(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) ++{ ++ const struct hlsl_type *type = load->node.data_type; ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ ++ sm4_dst_from_node(&instr.dsts[0], &load->node); ++ instr.dst_count = 1; ++ ++ assert(type->class <= HLSL_CLASS_LAST_NUMERIC); ++ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) ++ { ++ struct hlsl_constant_value value; ++ ++ /* Uniform bools can be specified as anything, but internal bools always ++ * have 0 for false and ~0 for true. Normalize that here. */ ++ ++ instr.opcode = VKD3D_SM4_OP_MOVC; ++ ++ sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); ++ ++ memset(&value, 0xff, sizeof(value)); ++ sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); ++ memset(&value, 0, sizeof(value)); ++ sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].writemask); ++ instr.src_count = 3; ++ } ++ else ++ { ++ instr.opcode = VKD3D_SM4_OP_MOV; ++ ++ sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); ++ instr.src_count = 1; ++ } ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_loop(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_LOOP, ++ }; ++ ++ write_sm4_instruction(buffer, &instr); ++ ++ write_sm4_block(ctx, buffer, &loop->body); ++ ++ instr.opcode = VKD3D_SM4_OP_ENDLOOP; ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, ++ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, ++ const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) ++{ ++ struct sm4_src_register *src; ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ ++ instr.opcode = VKD3D_SM4_OP_GATHER4; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); ++ ++ if (texel_offset) ++ { ++ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) ++ { ++ if (ctx->profile->major_version < 5) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); ++ return; ++ } ++ instr.opcode = VKD3D_SM5_OP_GATHER4_PO; ++ sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); ++ } ++ } ++ ++ sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); ++ ++ src = &instr.srcs[instr.src_count++]; ++ sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); ++ src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; ++ src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; ++ src->swizzle = swizzle; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_resource_load(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_type *resource_type = load->resource.var->data_type; ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *sample_index = load->sample_index.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ ++ if (!hlsl_type_is_resource(resource_type)) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); ++ return; ++ } ++ ++ if (load->sampler.var) ++ { ++ const struct hlsl_type *sampler_type = load->sampler.var->data_type; ++ ++ if (!hlsl_type_is_resource(sampler_type)) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); ++ return; ++ } ++ ++ if (!load->sampler.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); ++ return; ++ } ++ } ++ ++ if (!load->resource.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); ++ return; ++ } ++ ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_LOAD: ++ write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, ++ coords, sample_index, texel_offset, load->sampling_dim); ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE: ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ if (!load->sampler.var) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); ++ return; ++ } ++ write_sm4_sample(ctx, buffer, load); ++ break; ++ ++ case HLSL_RESOURCE_GATHER_RED: ++ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, ++ &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); ++ break; ++ ++ case HLSL_RESOURCE_GATHER_GREEN: ++ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, ++ &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); ++ break; ++ ++ case HLSL_RESOURCE_GATHER_BLUE: ++ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, ++ &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); ++ break; ++ ++ case HLSL_RESOURCE_GATHER_ALPHA: ++ write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, ++ &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); ++ break; ++ } ++} ++ ++static void write_sm4_resource_store(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) ++{ ++ const struct hlsl_type *resource_type = store->resource.var->data_type; ++ ++ if (!hlsl_type_is_resource(resource_type)) ++ { ++ hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); ++ return; ++ } ++ ++ if (!store->resource.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); ++ return; ++ } ++ ++ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ { ++ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); ++ return; ++ } ++ ++ write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); ++} ++ ++static void write_sm4_store(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) ++{ ++ const struct hlsl_ir_node *rhs = store->rhs.node; ++ struct sm4_instruction instr; ++ unsigned int writemask; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_MOV; ++ ++ sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); ++ instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); ++ instr.dst_count = 1; ++ ++ sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_swizzle(struct hlsl_ctx *ctx, ++ struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) ++{ ++ struct sm4_instruction instr; ++ unsigned int writemask; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_MOV; ++ ++ sm4_dst_from_node(&instr.dsts[0], &swizzle->node); ++ instr.dst_count = 1; ++ ++ sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); ++ instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), ++ swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(buffer, &instr); ++} ++ ++static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_block *block) ++{ ++ const struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->data_type) ++ { ++ if (instr->data_type->class == HLSL_CLASS_MATRIX) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); ++ break; ++ } ++ else if (instr->data_type->class == HLSL_CLASS_OBJECT) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Object copy."); ++ break; ++ } ++ ++ assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_CALL: ++ vkd3d_unreachable(); ++ ++ case HLSL_IR_CONSTANT: ++ write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); ++ break; ++ ++ case HLSL_IR_EXPR: ++ write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); ++ break; ++ ++ case HLSL_IR_IF: ++ write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); ++ break; ++ ++ case HLSL_IR_JUMP: ++ write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); ++ break; ++ ++ case HLSL_IR_LOAD: ++ write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); ++ break; ++ ++ case HLSL_IR_RESOURCE_LOAD: ++ write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); ++ break; ++ ++ case HLSL_IR_RESOURCE_STORE: ++ write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); ++ break; ++ ++ case HLSL_IR_LOOP: ++ write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); ++ break; ++ ++ case HLSL_IR_STORE: ++ write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); ++ break; ++ ++ default: ++ hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); ++ } ++ } ++} ++ ++static void write_sm4_shdr(struct hlsl_ctx *ctx, ++ const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) ++{ ++ const struct hlsl_profile_info *profile = ctx->profile; ++ const struct hlsl_ir_var **extern_resources; ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ unsigned int extern_resources_count, i; ++ const struct hlsl_buffer *cbuffer; ++ const struct hlsl_ir_var *var; ++ size_t token_count_position; ++ ++ static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = ++ { ++ VKD3D_SM4_PS, ++ VKD3D_SM4_VS, ++ VKD3D_SM4_GS, ++ VKD3D_SM5_HS, ++ VKD3D_SM5_DS, ++ VKD3D_SM5_CS, ++ 0, /* EFFECT */ ++ 0, /* TEXTURE */ ++ VKD3D_SM4_LIB, ++ }; ++ ++ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); ++ ++ put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); ++ token_count_position = put_u32(&buffer, 0); ++ ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (cbuffer->reg.allocated) ++ write_sm4_dcl_constant_buffer(&buffer, cbuffer); ++ } ++ ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ enum hlsl_regset regset; ++ ++ var = extern_resources[i]; ++ regset = hlsl_type_get_regset(var->data_type); ++ ++ if (regset == HLSL_REGSET_SAMPLERS) ++ write_sm4_dcl_samplers(&buffer, var); ++ else if (regset == HLSL_REGSET_TEXTURES) ++ write_sm4_dcl_textures(ctx, &buffer, var, false); ++ else if (regset == HLSL_REGSET_UAVS) ++ write_sm4_dcl_textures(ctx, &buffer, var, true); ++ } ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) ++ write_sm4_dcl_semantic(ctx, &buffer, var); ++ } ++ ++ if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) ++ write_sm4_dcl_thread_group(&buffer, ctx->thread_count); ++ ++ if (ctx->temp_count) ++ write_sm4_dcl_temps(&buffer, ctx->temp_count); ++ ++ write_sm4_block(ctx, &buffer, &entry_func->body); ++ ++ write_sm4_ret(&buffer); ++ ++ set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); ++ ++ add_section(dxbc, TAG_SHDR, &buffer); ++ ++ vkd3d_free(extern_resources); ++} ++ ++int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) ++{ ++ struct dxbc_writer dxbc; ++ size_t i; ++ int ret; ++ ++ dxbc_writer_init(&dxbc); ++ ++ write_sm4_signature(ctx, &dxbc, false); ++ write_sm4_signature(ctx, &dxbc, true); ++ write_sm4_rdef(ctx, &dxbc); ++ write_sm4_shdr(ctx, entry_func, &dxbc); ++ ++ if (!(ret = ctx->result)) ++ ret = dxbc_writer_write(&dxbc, out); ++ for (i = 0; i < dxbc.section_count; ++i) ++ vkd3d_shader_free_shader_code(&dxbc.sections[i].data); ++ return ret; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index c9c15f01155..343fdb2252e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -22,6 +22,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -338,22 +340,35 @@ void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const stru + va_end(args); + } + ++size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) ++{ ++ size_t aligned_size = align(buffer->size, 4); ++ ++ if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, aligned_size, 1)) ++ { ++ buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; ++ return aligned_size; ++ } ++ ++ memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); ++ buffer->size = aligned_size; ++ return aligned_size; ++} ++ + size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) + { +- size_t aligned_size = align(size, 4); +- size_t offset = buffer->size; ++ size_t offset = bytecode_align(buffer); + + if (buffer->status) + return offset; + +- if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + aligned_size, 1)) ++ if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + size, 1)) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return offset; + } + memcpy(buffer->data + offset, bytes, size); +- memset(buffer->data + offset + size, 0xab, aligned_size - size); +- buffer->size = offset + aligned_size; ++ buffer->size = offset + size; + return offset; + } + +@@ -1070,7 +1085,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + + if (TRACE_ON()) + { +- vkd3d_shader_trace(parser); ++ vkd3d_shader_trace(&parser->instructions, &parser->shader_version); + } + + for (i = 0; i < parser->instructions.count; ++i) +@@ -1167,75 +1182,73 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + return ret; + } + +-static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, ++static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, ++ const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; ++ struct vkd3d_glsl_generator *glsl_generator; + struct vkd3d_shader_compile_info scan_info; +- struct spirv_compiler *spirv_compiler; +- struct vkd3d_shader_parser *parser; + int ret; + ++ vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); ++ + scan_info = *compile_info; + scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; + scan_descriptor_info.next = scan_info.next; + scan_info.next = &scan_descriptor_info; + +- if ((ret = scan_dxbc(&scan_info, message_context)) < 0) ++ if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) + return ret; + +- if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) ++ switch (compile_info->target_type) + { +- WARN("Failed to initialise shader parser.\n"); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return ret; +- } ++ case VKD3D_SHADER_TARGET_D3D_ASM: ++ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); ++ break; + +- vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); ++ case VKD3D_SHADER_TARGET_GLSL: ++ if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, ++ message_context, &parser->location))) ++ { ++ ERR("Failed to create GLSL generator.\n"); ++ vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); ++ return VKD3D_ERROR; ++ } + +- if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) +- { +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); +- vkd3d_shader_parser_destroy(parser); +- return ret; +- } ++ ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); ++ vkd3d_glsl_generator_destroy(glsl_generator); ++ break; + +- if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) +- { +- struct vkd3d_glsl_generator *glsl_generator; ++ case VKD3D_SHADER_TARGET_SPIRV_BINARY: ++ case VKD3D_SHADER_TARGET_SPIRV_TEXT: ++ ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); ++ break; + +- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, +- message_context, &parser->location))) +- { +- ERR("Failed to create GLSL generator.\n"); +- vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return VKD3D_ERROR; +- } ++ default: ++ /* Validation should prevent us from reaching this. */ ++ assert(0); ++ } + +- ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); ++ vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); ++ return ret; ++} + +- vkd3d_glsl_generator_destroy(glsl_generator); +- vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return ret; +- } ++static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_parser *parser; ++ int ret; + +- if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, +- compile_info, &scan_descriptor_info, message_context, &parser->location))) ++ if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) + { +- ERR("Failed to create DXBC compiler.\n"); +- vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); +- return VKD3D_ERROR; ++ WARN("Failed to initialise shader parser.\n"); ++ return ret; + } + +- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); ++ ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); + +- spirv_compiler_destroy(spirv_compiler); + vkd3d_shader_parser_destroy(parser); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; + } + +@@ -1270,7 +1283,7 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ + + if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { +- ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); ++ ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); + vkd3d_shader_parser_destroy(parser); + return ret; + } +@@ -1388,10 +1401,54 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu + desc->version = 0; + } + ++static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, ++ const struct shader_signature *src) ++{ ++ unsigned int i; ++ ++ signature->element_count = src->element_count; ++ if (!src->elements) ++ { ++ assert(!signature->element_count); ++ signature->elements = NULL; ++ return true; ++ } ++ ++ if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) ++ return false; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ struct vkd3d_shader_signature_element *d = &signature->elements[i]; ++ struct signature_element *e = &src->elements[i]; ++ ++ d->semantic_name = e->semantic_name; ++ d->semantic_index = e->semantic_index; ++ d->stream_index = e->stream_index; ++ d->sysval_semantic = e->sysval_semantic; ++ d->component_type = e->component_type; ++ d->register_index = e->register_index; ++ if (e->register_count > 1) ++ FIXME("Arrayed elements are not supported yet.\n"); ++ d->mask = e->mask; ++ d->used_mask = e->used_mask; ++ d->min_precision = e->min_precision; ++ } ++ ++ return true; ++} ++ ++void shader_signature_cleanup(struct shader_signature *signature) ++{ ++ vkd3d_free(signature->elements); ++ signature->elements = NULL; ++} ++ + int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_signature *signature, char **messages) + { + struct vkd3d_shader_message_context message_context; ++ struct shader_signature shader_signature; + int ret; + + TRACE("dxbc {%p, %zu}, signature %p, messages %p.\n", dxbc->code, dxbc->size, signature, messages); +@@ -1400,13 +1457,17 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + *messages = NULL; + vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); + +- ret = shader_parse_input_signature(dxbc, &message_context, signature); ++ ret = shader_parse_input_signature(dxbc, &message_context, &shader_signature); + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_message_context_cleanup(&message_context); + ++ if (!vkd3d_shader_signature_from_shader_signature(signature, &shader_signature)) ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ shader_signature_cleanup(&shader_signature); + return ret; + } + +@@ -1642,6 +1703,84 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins + return true; + } + ++static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( ++ struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, ++ unsigned int count); ++ ++static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, ++ struct vkd3d_shader_instruction_array *instructions) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < reg->idx_count; ++i) ++ { ++ if (!reg->idx[i].rel_addr) ++ continue; ++ ++ if (!(reg->idx[i].rel_addr = shader_instruction_array_clone_src_params(instructions, reg->idx[i].rel_addr, 1))) ++ return false; ++ } ++ ++ return true; ++} ++ ++static struct vkd3d_shader_dst_param *shader_instruction_array_clone_dst_params( ++ struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_dst_param *params, ++ unsigned int count) ++{ ++ struct vkd3d_shader_dst_param *dst_params; ++ unsigned int i; ++ ++ if (!(dst_params = shader_dst_param_allocator_get(&instructions->dst_params, count))) ++ return NULL; ++ ++ memcpy(dst_params, params, count * sizeof(*params)); ++ for (i = 0; i < count; ++i) ++ { ++ if (!shader_register_clone_relative_addresses(&dst_params[i].reg, instructions)) ++ return NULL; ++ } ++ ++ return dst_params; ++} ++ ++static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( ++ struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, ++ unsigned int count) ++{ ++ struct vkd3d_shader_src_param *src_params; ++ unsigned int i; ++ ++ if (!(src_params = shader_src_param_allocator_get(&instructions->src_params, count))) ++ return NULL; ++ ++ memcpy(src_params, params, count * sizeof(*params)); ++ for (i = 0; i < count; ++i) ++ { ++ if (!shader_register_clone_relative_addresses(&src_params[i].reg, instructions)) ++ return NULL; ++ } ++ ++ return src_params; ++} ++ ++/* NOTE: Immediate constant buffers are not cloned, so the source must not be destroyed while the ++ * destination is in use. This seems like a reasonable requirement given how this is currently used. */ ++bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, ++ unsigned int dst, unsigned int src) ++{ ++ struct vkd3d_shader_instruction *ins = &instructions->elements[dst]; ++ ++ *ins = instructions->elements[src]; ++ ++ if (ins->dst_count && ins->dst && !(ins->dst = shader_instruction_array_clone_dst_params(instructions, ++ ins->dst, ins->dst_count))) ++ return false; ++ ++ return !ins->src_count || !!(ins->src = shader_instruction_array_clone_src_params(instructions, ++ ins->src, ins->src_count)); ++} ++ + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions) + { + unsigned int i; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 79be999adf9..406d53a3391 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -49,7 +49,7 @@ + #include "vkd3d_common.h" + #include "vkd3d_memory.h" + #include "vkd3d_shader.h" +-#include "wine/list.h" ++#include "list.h" + + #include + #include +@@ -74,6 +74,13 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, + VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY = 1002, ++ VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT = 1003, ++ VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, ++ VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, ++ VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, ++ ++ VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, ++ VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, + + VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, + VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, +@@ -125,6 +132,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, + VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, + VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, ++ VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +@@ -223,6 +231,7 @@ enum vkd3d_shader_opcode + VKD3DSIH_DEQ, + VKD3DSIH_DFMA, + VKD3DSIH_DGE, ++ VKD3DSIH_DISCARD, + VKD3DSIH_DIV, + VKD3DSIH_DLT, + VKD3DSIH_DMAX, +@@ -675,6 +684,7 @@ struct vkd3d_shader_register + bool non_uniform; + enum vkd3d_data_type data_type; + struct vkd3d_shader_register_index idx[3]; ++ unsigned int idx_count; + enum vkd3d_immconst_type immconst_type; + union + { +@@ -774,13 +784,36 @@ enum vkd3d_shader_input_sysval_semantic + VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, + }; + ++struct signature_element ++{ ++ unsigned int sort_index; ++ const char *semantic_name; ++ unsigned int semantic_index; ++ unsigned int stream_index; ++ enum vkd3d_shader_sysval_semantic sysval_semantic; ++ enum vkd3d_shader_component_type component_type; ++ unsigned int register_index; ++ unsigned int register_count; ++ unsigned int mask; ++ unsigned int used_mask; ++ enum vkd3d_shader_minimum_precision min_precision; ++}; ++ ++struct shader_signature ++{ ++ struct signature_element *elements; ++ unsigned int element_count; ++}; ++ ++void shader_signature_cleanup(struct shader_signature *signature); ++ + struct vkd3d_shader_desc + { + const uint32_t *byte_code; + size_t byte_code_size; +- struct vkd3d_shader_signature input_signature; +- struct vkd3d_shader_signature output_signature; +- struct vkd3d_shader_signature patch_constant_signature; ++ struct shader_signature input_signature; ++ struct shader_signature output_signature; ++ struct shader_signature patch_constant_signature; + }; + + struct vkd3d_shader_register_semantic +@@ -927,6 +960,11 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg + return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT; + } + ++static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_shader_register *reg) ++{ ++ return reg->type == VKD3DSPR_PATCHCONST; ++} ++ + struct vkd3d_shader_location + { + const char *source_name; +@@ -981,6 +1019,8 @@ bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instru + bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); + bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, + struct vkd3d_shader_immediate_constant_buffer *icb); ++bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, ++ unsigned int dst, unsigned int src); + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); + + struct vkd3d_shader_parser +@@ -991,10 +1031,8 @@ struct vkd3d_shader_parser + + struct vkd3d_shader_desc shader_desc; + struct vkd3d_shader_version shader_version; +- const uint32_t *ptr; + const struct vkd3d_shader_parser_ops *ops; + struct vkd3d_shader_instruction_array instructions; +- size_t instruction_idx; + }; + + struct vkd3d_shader_parser_ops +@@ -1028,7 +1066,8 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse + parser->ops->parser_destroy(parser); + } + +-void vkd3d_shader_trace(struct vkd3d_shader_parser *parser); ++void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, ++ const struct vkd3d_shader_version *shader_version); + + const char *shader_get_type_prefix(enum vkd3d_shader_type type); + +@@ -1044,8 +1083,9 @@ struct vkd3d_string_buffer_cache + size_t count, max_count, capacity; + }; + +-enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out); ++enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, ++ const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out); + void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); + struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); + void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); +@@ -1067,6 +1107,8 @@ struct vkd3d_bytecode_buffer + int status; + }; + ++/* Align to the next 4-byte offset, and return that offset. */ ++size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer); + size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); + void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value); + +@@ -1128,8 +1170,10 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + + void free_shader_desc(struct vkd3d_shader_desc *desc); + ++int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, ++ struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); + int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature); ++ struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); + + struct vkd3d_glsl_generator; + +@@ -1141,16 +1185,10 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); + + #define SPIRV_MAX_SRC_COUNT 6 + +-struct spirv_compiler; +- +-struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, +- const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, ++int spirv_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, +- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); +-int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, +- struct vkd3d_shader_code *spirv); +-void spirv_compiler_destroy(struct spirv_compiler *compiler); ++ const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + + void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); + +@@ -1202,6 +1240,14 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( + } + } + ++enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, ++ unsigned int index); ++ ++static inline enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) ++{ ++ return vkd3d_siv_from_sysval_indexed(sysval, 0); ++} ++ + static inline unsigned int vkd3d_write_mask_get_component_idx(DWORD write_mask) + { + unsigned int i; +@@ -1323,4 +1369,11 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void + void dxbc_writer_init(struct dxbc_writer *dxbc); + int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); + ++enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); ++enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( ++ struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); ++enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, ++ enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, ++ struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); ++ + #endif /* __VKD3D_SHADER_PRIVATE_H */ +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 6eddcfa2d14..32439eec7eb 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -1437,7 +1437,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( + pool_desc.pNext = NULL; + pool_desc.flags = 0; + pool_desc.maxSets = 512; +- pool_desc.poolSizeCount = ARRAY_SIZE(device->vk_pool_sizes); ++ pool_desc.poolSizeCount = device->vk_pool_count; + pool_desc.pPoolSizes = device->vk_pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) + { +@@ -2463,6 +2463,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, + memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); + memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets)); + ++ list->descriptor_heap_count = 0; ++ + ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); + } + +@@ -2720,28 +2722,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + unsigned int index, bool use_array) + { + uint32_t descriptor_range_magic = range->descriptor_magic; +- const struct vkd3d_view *view = descriptor->s.u.view_info.view; ++ union d3d12_desc_object u = descriptor->s.u; + uint32_t vk_binding = range->binding; ++ VkDescriptorType vk_descriptor_type; + uint32_t set = range->set; + +- if (descriptor->s.magic != descriptor_range_magic) ++ if (!u.header || u.header->magic != descriptor_range_magic) + return false; + ++ vk_descriptor_type = u.header->vk_descriptor_type; ++ + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_write->pNext = NULL; + vk_descriptor_write->dstSet = vk_descriptor_sets[set]; + vk_descriptor_write->dstBinding = use_array ? vk_binding : vk_binding + index; + vk_descriptor_write->dstArrayElement = use_array ? index : 0; + vk_descriptor_write->descriptorCount = 1; +- vk_descriptor_write->descriptorType = descriptor->s.vk_descriptor_type; ++ vk_descriptor_write->descriptorType = vk_descriptor_type; + vk_descriptor_write->pImageInfo = NULL; + vk_descriptor_write->pBufferInfo = NULL; + vk_descriptor_write->pTexelBufferView = NULL; + +- switch (descriptor->s.magic) ++ switch (u.header->magic) + { + case VKD3D_DESCRIPTOR_MAGIC_CBV: +- vk_descriptor_write->pBufferInfo = &descriptor->s.u.vk_cbv_info; ++ vk_descriptor_write->pBufferInfo = &u.cb_desc->vk_cbv_info; + break; + + case VKD3D_DESCRIPTOR_MAGIC_SRV: +@@ -2752,8 +2757,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + * in pairs in one set. */ + if (range->descriptor_count == UINT_MAX) + { +- if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER +- && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) ++ if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ++ && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + { + vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; + vk_descriptor_write->dstBinding = 0; +@@ -2763,21 +2768,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + { + if (!use_array) + vk_descriptor_write->dstBinding = vk_binding + 2 * index; +- if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER +- && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) ++ if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ++ && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + ++vk_descriptor_write->dstBinding; + } + +- if (descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER +- || descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) ++ if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ++ || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + { +- vk_descriptor_write->pTexelBufferView = &view->u.vk_buffer_view; ++ vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; + } + else + { + vk_image_info->sampler = VK_NULL_HANDLE; +- vk_image_info->imageView = view->u.vk_image_view; +- vk_image_info->imageLayout = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_SRV ++ vk_image_info->imageView = u.view->v.u.vk_image_view; ++ vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV + ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; + + vk_descriptor_write->pImageInfo = vk_image_info; +@@ -2785,7 +2790,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + break; + + case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: +- vk_image_info->sampler = view->u.vk_sampler; ++ vk_image_info->sampler = u.view->v.u.vk_sampler; + vk_image_info->imageView = VK_NULL_HANDLE; + vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + +@@ -2793,7 +2798,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + break; + + default: +- ERR("Invalid descriptor %#x.\n", descriptor->s.magic); ++ ERR("Invalid descriptor %#x.\n", u.header->magic); + return false; + } + +@@ -2847,6 +2852,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list + for (j = 0; j < descriptor_count; ++j, ++descriptor) + { + unsigned int register_idx = range->base_register_idx + j; ++ union d3d12_desc_object u = descriptor->s.u; ++ VkBufferView vk_counter_view; ++ ++ vk_counter_view = (u.header && u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) ++ ? u.view->v.vk_counter_view : VK_NULL_HANDLE; + + /* Track UAV counters. */ + if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) +@@ -2856,8 +2866,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list + if (state->uav_counters.bindings[k].register_space == range->register_space + && state->uav_counters.bindings[k].register_index == register_idx) + { +- VkBufferView vk_counter_view = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV +- ? descriptor->s.u.view_info.view->vk_counter_view : VK_NULL_HANDLE; + if (bindings->vk_uav_counter_views[k] != vk_counter_view) + bindings->uav_counters_dirty = true; + bindings->vk_uav_counter_views[k] = vk_counter_view; +@@ -2867,7 +2875,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list + } + + /* Not all descriptors are necessarily populated if the range is unbounded. */ +- if (descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) ++ if (!u.header) + continue; + + if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, +@@ -3153,6 +3161,30 @@ static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_lis + } + } + ++static bool contains_heap(struct d3d12_descriptor_heap **heap_array, unsigned int count, ++ const struct d3d12_descriptor_heap *query) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < count; ++i) ++ if (heap_array[i] == query) ++ return true; ++ return false; ++} ++ ++static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) ++{ ++ struct d3d12_device *device = list->device; ++ unsigned int i; ++ ++ for (i = 0; i < list->descriptor_heap_count; ++i) ++ { ++ vkd3d_mutex_lock(&list->descriptor_heaps[i]->vk_sets_mutex); ++ d3d12_desc_flush_vk_heap_updates_locked(list->descriptor_heaps[i], device); ++ vkd3d_mutex_unlock(&list->descriptor_heaps[i]->vk_sets_mutex); ++ } ++} ++ + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) + { +@@ -3177,10 +3209,18 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l + bindings->sampler_heap_id = heap->serial_id; + } + +- /* These sets can be shared across multiple command lists, and therefore binding must +- * be synchronised. On an experimental branch in which caching of Vk descriptor writes +- * greatly increased the chance of multiple threads arriving here at the same time, +- * GRID 2019 crashed without the mutex lock. */ ++ if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) ++ { ++ if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) ++ { ++ /* Descriptors can be written after binding. */ ++ FIXME("Flushing descriptor updates while list %p is not closed.\n", list); ++ command_list_flush_vk_heap_updates(list); ++ list->descriptor_heap_count = 0; ++ } ++ list->descriptor_heaps[list->descriptor_heap_count++] = heap; ++ } ++ + vkd3d_mutex_lock(&heap->vk_sets_mutex); + + for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) +@@ -3963,10 +4003,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo + vk_viewports[i].minDepth = viewports[i].MinDepth; + vk_viewports[i].maxDepth = viewports[i].MaxDepth; + +- if (!vk_viewports[i].width || !vk_viewports[i].height) ++ if (vk_viewports[i].width <= 0.0f) + { +- FIXME_ONCE("Invalid viewport %u, ignoring RSSetViewports().\n", i); +- return; ++ /* Vulkan does not support width <= 0 */ ++ FIXME_ONCE("Setting invalid viewport %u to zero height.\n", i); ++ vk_viewports[i].width = 1.0f; ++ vk_viewports[i].height = 0.0f; + } + } + +@@ -4481,11 +4523,20 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + root_parameter = root_signature_get_root_descriptor(root_signature, index); + assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); + +- resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); +- buffer_info.buffer = resource->u.vk_buffer; +- buffer_info.offset = gpu_address - resource->gpu_address; +- buffer_info.range = resource->desc.Width - buffer_info.offset; +- buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); ++ if (gpu_address) ++ { ++ resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); ++ buffer_info.buffer = resource->u.vk_buffer; ++ buffer_info.offset = gpu_address - resource->gpu_address; ++ buffer_info.range = resource->desc.Width - buffer_info.offset; ++ buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); ++ } ++ else ++ { ++ buffer_info.buffer = list->device->null_resources.vk_buffer; ++ buffer_info.offset = 0; ++ buffer_info.range = VK_WHOLE_SIZE; ++ } + + if (vk_info->KHR_push_descriptor) + { +@@ -4547,13 +4598,13 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); + + /* FIXME: Re-use buffer views. */ +- if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view)) ++ if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) + { + ERR("Failed to create buffer view.\n"); + return; + } + +- if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) ++ if (vk_buffer_view && !(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + { + ERR("Failed to add buffer view.\n"); + VK_CALL(vkDestroyBufferView(vk_device, vk_buffer_view, NULL)); +@@ -4644,6 +4695,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics + WARN("Ignoring NULL index buffer view.\n"); + return; + } ++ if (!view->BufferLocation) ++ { ++ WARN("Ignoring index buffer location 0.\n"); ++ return; ++ } + + vk_procs = &list->device->vk_procs; + +@@ -4844,7 +4900,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi + WARN("Failed to add view.\n"); + } + +- list->rtvs[i] = view->u.vk_image_view; ++ list->rtvs[i] = view->v.u.vk_image_view; + list->fb_width = max(list->fb_width, rtv_desc->width); + list->fb_height = max(list->fb_height, rtv_desc->height); + list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); +@@ -4868,7 +4924,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi + list->dsv = VK_NULL_HANDLE; + } + +- list->dsv = view->u.vk_image_view; ++ list->dsv = view->v.u.vk_image_view; + list->fb_width = max(list->fb_width, dsv_desc->width); + list->fb_height = max(list->fb_height, dsv_desc->height); + list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); +@@ -4960,7 +5016,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, + fb_desc.flags = 0; + fb_desc.renderPass = vk_render_pass; + fb_desc.attachmentCount = 1; +- fb_desc.pAttachments = &view->u.vk_image_view; ++ fb_desc.pAttachments = &view->v.u.vk_image_view; + fb_desc.width = width; + fb_desc.height = height; + fb_desc.layers = layer_count; +@@ -5163,13 +5219,14 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea + } + + static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, +- struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, ++ struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, + unsigned int rect_count, const D3D12_RECT *rects) + { + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + unsigned int i, miplevel_idx, layer_count; + struct vkd3d_uav_clear_pipeline pipeline; + struct vkd3d_uav_clear_args clear_args; ++ const struct vkd3d_resource_view *view; + VkDescriptorImageInfo image_info; + D3D12_RECT full_rect, curr_rect; + VkWriteDescriptorSet write_set; +@@ -5181,8 +5238,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, + d3d12_command_list_invalidate_bindings(list, list->state); + d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + +- if (!d3d12_command_allocator_add_view(list->allocator, view)) ++ if (!d3d12_command_allocator_add_view(list->allocator, descriptor)) + WARN("Failed to add view.\n"); ++ view = &descriptor->v; + + clear_args.colour = *clear_colour; + +@@ -5295,10 +5353,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + const UINT values[4], UINT rect_count, const D3D12_RECT *rects) + { + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); ++ struct vkd3d_view *descriptor, *uint_view = NULL; + struct d3d12_device *device = list->device; +- struct vkd3d_view *view, *uint_view = NULL; + struct vkd3d_texture_view_desc view_desc; + const struct vkd3d_format *uint_format; ++ const struct vkd3d_resource_view *view; + struct d3d12_resource *resource_impl; + VkClearColorValue colour; + +@@ -5306,7 +5365,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); + + resource_impl = unsafe_impl_from_ID3D12Resource(resource); +- view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; ++ if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) ++ return; ++ view = &descriptor->v; + memcpy(colour.uint32, values, sizeof(colour.uint32)); + + if (view->format->type != VKD3D_FORMAT_TYPE_UINT) +@@ -5320,8 +5381,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + + if (d3d12_resource_is_buffer(resource_impl)) + { +- if (!vkd3d_create_buffer_view(device, resource_impl->u.vk_buffer, uint_format, +- view->info.buffer.offset, view->info.buffer.size, &uint_view)) ++ if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, ++ uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) + { + ERR("Failed to create buffer view.\n"); + return; +@@ -5337,16 +5398,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + view_desc.layer_idx = view->info.texture.layer_idx; + view_desc.layer_count = view->info.texture.layer_count; + +- if (!vkd3d_create_texture_view(device, resource_impl->u.vk_image, &view_desc, &uint_view)) ++ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, ++ &uint_view)) + { + ERR("Failed to create image view.\n"); + return; + } + } +- view = uint_view; ++ descriptor = uint_view; + } + +- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); ++ d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); + + if (uint_view) + vkd3d_view_decref(uint_view, device); +@@ -5365,7 +5427,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I + iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); + + resource_impl = unsafe_impl_from_ID3D12Resource(resource); +- view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; ++ if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) ++ return; + memcpy(colour.float32, values, sizeof(colour.float32)); + + d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); +@@ -5906,6 +5969,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d + + list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors + : d3d12_command_list_update_descriptors; ++ list->descriptor_heap_count = 0; + + if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) + { +@@ -6199,6 +6263,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm + return; + } + ++ command_list_flush_vk_heap_updates(cmd_list); ++ + buffers[i] = cmd_list->vk_command_buffer; + } + +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 39a5ca013c7..4263dcf4184 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -19,6 +19,8 @@ + #include "vkd3d_private.h" + #include "vkd3d_version.h" + ++#define VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE 256u ++ + struct vkd3d_struct + { + enum vkd3d_structure_type type; +@@ -2393,9 +2395,23 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) + WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); + } + +-static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, +- const struct vkd3d_device_descriptor_limits *limits) ++static void device_init_descriptor_pool_sizes(struct d3d12_device *device) + { ++ const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; ++ VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; ++ ++ if (device->use_vk_heaps) ++ { ++ pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; ++ pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, ++ VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); ++ pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; ++ pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; ++ device->vk_pool_count = 2; ++ return; ++ } ++ ++ assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +@@ -2412,8 +2428,27 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, + pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; + pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); ++ device->vk_pool_count = 6; + }; + ++static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) ++{ ++ cache->head = NULL; ++ cache->size = size; ++} ++ ++static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) ++{ ++ union d3d12_desc_object u; ++ void *next; ++ ++ for (u.object = cache->head; u.object; u.object = next) ++ { ++ next = u.header->next; ++ vkd3d_free(u.object); ++ } ++} ++ + /* ID3D12Device */ + static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) + { +@@ -2454,7 +2489,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) + { + struct d3d12_device *device = impl_from_ID3D12Device(iface); + ULONG refcount = InterlockedDecrement(&device->refcount); +- size_t i; + + TRACE("%p decreasing refcount to %u.\n", device, refcount); + +@@ -2474,8 +2508,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) + vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); + d3d12_device_destroy_pipeline_cache(device); + d3d12_device_destroy_vkd3d_queues(device); +- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) +- vkd3d_mutex_destroy(&device->desc_mutex[i]); ++ vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); ++ vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); + VK_CALL(vkDestroyDevice(device->vk_device, NULL)); + if (device->parent) + IUnknown_Release(device->parent); +@@ -3368,132 +3402,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + } + +-static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], +- struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) +-{ +- enum vkd3d_vk_descriptor_set_index set; +- for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set) +- { +- if (!infos[set].count) +- continue; +- d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); +- infos[set].count = 0; +- infos[set].uav_counter = false; +- } +-} +- +-static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, +- struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], +- struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) +-{ +- struct d3d12_desc_copy_location *location; +- enum vkd3d_vk_descriptor_set_index set; +- struct vkd3d_mutex *mutex; +- +- mutex = d3d12_device_get_descriptor_mutex(device, src); +- vkd3d_mutex_lock(mutex); +- +- if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) +- { +- /* Source must be unlocked first, and therefore can't be used as a null source. */ +- static const struct d3d12_desc null = {0}; +- vkd3d_mutex_unlock(mutex); +- d3d12_desc_write_atomic(dst, &null, device); +- return; +- } +- +- set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->s.vk_descriptor_type); +- location = &locations[set][infos[set].count++]; +- +- location->src.s = src->s; +- +- if (location->src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +- vkd3d_view_incref(location->src.s.u.view_info.view); +- +- vkd3d_mutex_unlock(mutex); +- +- infos[set].uav_counter |= (location->src.s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) +- && !!location->src.s.u.view_info.view->vk_counter_view; +- location->dst = dst; +- +- if (infos[set].count == ARRAY_SIZE(locations[0])) +- { +- d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); +- infos[set].count = 0; +- infos[set].uav_counter = false; +- } +-} +- +-/* Some games, e.g. Control, copy a large number of descriptors per frame, so the +- * speed of this function is critical. */ +-static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, +- UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, +- const UINT *dst_descriptor_range_sizes, +- UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, +- const UINT *src_descriptor_range_sizes) +-{ +- struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; +- unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; +- /* The locations array is relatively large, and often mostly empty. Keeping these +- * values together in a separate array will likely result in fewer cache misses. */ +- struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT]; +- struct d3d12_descriptor_heap *descriptor_heap = NULL; +- const struct d3d12_desc *src, *heap_base, *heap_end; +- unsigned int dst_range_size, src_range_size; +- struct d3d12_desc *dst; +- +- descriptor_heap = d3d12_desc_get_descriptor_heap(d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0])); +- heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; +- heap_end = heap_base + descriptor_heap->desc.NumDescriptors; +- +- memset(infos, 0, sizeof(infos)); +- dst_range_idx = dst_idx = 0; +- src_range_idx = src_idx = 0; +- while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) +- { +- dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; +- src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; +- +- dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); +- src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); +- +- if (dst < heap_base || dst >= heap_end) +- { +- flush_desc_writes(locations, infos, descriptor_heap, device); +- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); +- heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; +- heap_end = heap_base + descriptor_heap->desc.NumDescriptors; +- } +- +- for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) +- { +- /* We don't need to lock either descriptor for the identity check. The descriptor +- * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a +- * race condition in the calling app. It is unnecessary to protect this test as it's +- * the app's race condition, not ours. */ +- if (dst[dst_idx].s.magic == src[src_idx].s.magic && (dst[dst_idx].s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +- && dst[dst_idx].s.u.view_info.written_serial_id == src[src_idx].s.u.view_info.view->serial_id) +- continue; +- d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); +- } +- +- if (dst_idx >= dst_range_size) +- { +- ++dst_range_idx; +- dst_idx = 0; +- } +- if (src_idx >= src_range_size) +- { +- ++src_range_idx; +- src_idx = 0; +- } +- } +- +- flush_desc_writes(locations, infos, descriptor_heap, device); +-} +- +-#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 +- + static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, + const UINT *dst_descriptor_range_sizes, +@@ -3525,15 +3433,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + if (!dst_descriptor_range_count) + return; + +- if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes +- && dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT))) +- { +- d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets, +- dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, +- src_descriptor_range_sizes); +- return; +- } +- + dst_range_idx = dst_idx = 0; + src_range_idx = src_idx = 0; + while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) +@@ -3544,8 +3443,12 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, + dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); + src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); + +- while (dst_idx < dst_range_size && src_idx < src_range_size) +- d3d12_desc_copy(&dst[dst_idx++], &src[src_idx++], device); ++ for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) ++ { ++ if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) ++ continue; ++ d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); ++ } + + if (dst_idx >= dst_range_size) + { +@@ -3570,17 +3473,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i + iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, + descriptor_heap_type); + +- if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT) +- { +- struct d3d12_device *device = impl_from_ID3D12Device(iface); +- if (device->use_vk_heaps) +- { +- d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset, +- &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count); +- return; +- } +- } +- + d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, + 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); + } +@@ -4080,7 +3972,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + { + const struct vkd3d_vk_device_procs *vk_procs; + HRESULT hr; +- size_t i; + + device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; + device->refcount = 1; +@@ -4123,10 +4014,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, + device->blocked_queue_count = 0; + vkd3d_mutex_init(&device->blocked_queues_mutex); + +- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) +- vkd3d_mutex_init(&device->desc_mutex[i]); ++ vkd3d_desc_object_cache_init(&device->view_desc_cache, sizeof(struct vkd3d_view)); ++ vkd3d_desc_object_cache_init(&device->cbuffer_desc_cache, sizeof(struct vkd3d_cbuffer_desc)); + +- vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); ++ device_init_descriptor_pool_sizes(device); + + if ((device->parent = create_info->parent)) + IUnknown_AddRef(device->parent); +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index 8c050cfeb32..ea7b6859cc1 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -326,6 +326,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap) + + vkd3d_private_store_destroy(&heap->private_store); + ++ if (heap->map_ptr) ++ VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); ++ + VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL)); + + vkd3d_mutex_destroy(&heap->mutex); +@@ -346,12 +349,19 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_Release(ID3D12Heap *iface) + + TRACE("%p decreasing refcount to %u.\n", heap, refcount); + +- if (!refcount) ++ /* A heap must not be destroyed until all contained resources are destroyed. */ ++ if (!refcount && !heap->resource_count) + d3d12_heap_destroy(heap); + + return refcount; + } + ++static void d3d12_heap_resource_destroyed(struct d3d12_heap *heap) ++{ ++ if (!InterlockedDecrement(&heap->resource_count) && (!heap->refcount || heap->is_private)) ++ d3d12_heap_destroy(heap); ++} ++ + static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(ID3D12Heap *iface, + REFGUID guid, UINT *data_size, void *data) + { +@@ -437,97 +447,6 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) + return impl_from_ID3D12Heap(iface); + } + +-static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, +- struct d3d12_resource *resource, void **data) +-{ +- struct d3d12_device *device = heap->device; +- HRESULT hr = S_OK; +- VkResult vr; +- +- vkd3d_mutex_lock(&heap->mutex); +- +- assert(!resource->map_count || heap->map_ptr); +- +- if (!resource->map_count) +- { +- if (!heap->map_ptr) +- { +- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +- +- TRACE("Mapping heap %p.\n", heap); +- +- assert(!heap->map_count); +- +- if ((vr = VK_CALL(vkMapMemory(device->vk_device, heap->vk_memory, +- 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) +- { +- WARN("Failed to map device memory, vr %d.\n", vr); +- heap->map_ptr = NULL; +- } +- +- hr = hresult_from_vk_result(vr); +- } +- +- if (heap->map_ptr) +- ++heap->map_count; +- } +- +- if (hr == S_OK) +- { +- assert(heap->map_ptr); +- if (data) +- *data = (BYTE *)heap->map_ptr + offset; +- ++resource->map_count; +- } +- else +- { +- assert(!heap->map_ptr); +- if (data) +- *data = NULL; +- } +- +- vkd3d_mutex_unlock(&heap->mutex); +- +- return hr; +-} +- +-static void d3d12_heap_unmap(struct d3d12_heap *heap, struct d3d12_resource *resource) +-{ +- struct d3d12_device *device = heap->device; +- +- vkd3d_mutex_lock(&heap->mutex); +- +- if (!resource->map_count) +- { +- WARN("Resource %p is not mapped.\n", resource); +- goto done; +- } +- +- --resource->map_count; +- if (resource->map_count) +- goto done; +- +- if (!heap->map_count) +- { +- ERR("Heap %p is not mapped.\n", heap); +- goto done; +- } +- +- --heap->map_count; +- if (!heap->map_count) +- { +- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +- +- TRACE("Unmapping heap %p, ptr %p.\n", heap, heap->map_ptr); +- +- VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); +- heap->map_ptr = NULL; +- } +- +-done: +- vkd3d_mutex_unlock(&heap->mutex); +-} +- + static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) + { + if (!resource && !desc->SizeInBytes) +@@ -552,15 +471,23 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1 + return S_OK; + } + ++static VkMemoryPropertyFlags d3d12_heap_get_memory_property_flags(const struct d3d12_heap *heap) ++{ ++ return heap->device->memory_properties.memoryTypes[heap->vk_memory_type].propertyFlags; ++} ++ + static HRESULT d3d12_heap_init(struct d3d12_heap *heap, + struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) + { ++ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkMemoryRequirements memory_requirements; + VkDeviceSize vk_memory_size; ++ VkResult vr; + HRESULT hr; + + heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; + heap->refcount = 1; ++ heap->resource_count = 0; + + heap->is_private = !!resource; + +@@ -628,6 +555,20 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, + heap->device = device; + if (!heap->is_private) + d3d12_device_add_ref(heap->device); ++ else ++ heap->resource_count = 1; ++ ++ if (d3d12_heap_get_memory_property_flags(heap) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ++ { ++ if ((vr = VK_CALL(vkMapMemory(device->vk_device, ++ heap->vk_memory, 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) ++ { ++ heap->map_ptr = NULL; ++ ERR("Failed to map memory, vr %d.\n", vr); ++ d3d12_heap_destroy(heap); ++ return hresult_from_vk_result(hr); ++ } ++ } + + return S_OK; + } +@@ -1027,8 +968,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 + else + VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); + +- if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) +- d3d12_heap_destroy(resource->heap); ++ if (resource->heap) ++ d3d12_heap_resource_destroyed(resource->heap); + } + + static ULONG d3d12_resource_incref(struct d3d12_resource *resource) +@@ -1223,12 +1164,55 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, + return d3d12_device_query_interface(resource->device, iid, device); + } + ++static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) ++{ ++ assert(resource->heap->map_ptr); ++ return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; ++} ++ ++static void d3d12_resource_get_vk_range(struct d3d12_resource *resource, ++ uint64_t offset, uint64_t size, VkMappedMemoryRange *vk_range) ++{ ++ vk_range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; ++ vk_range->pNext = NULL; ++ vk_range->memory = resource->heap->vk_memory; ++ vk_range->offset = resource->heap_offset + offset; ++ vk_range->size = size; ++} ++ ++static void d3d12_resource_invalidate(struct d3d12_resource *resource, uint64_t offset, uint64_t size) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; ++ VkMappedMemoryRange vk_range; ++ VkResult vr; ++ ++ if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) ++ return; ++ ++ d3d12_resource_get_vk_range(resource, offset, size, &vk_range); ++ if ((vr = VK_CALL(vkInvalidateMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) ++ ERR("Failed to invalidate memory, vr %d.\n", vr); ++} ++ ++static void d3d12_resource_flush(struct d3d12_resource *resource, uint64_t offset, uint64_t size) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; ++ VkMappedMemoryRange vk_range; ++ VkResult vr; ++ ++ if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) ++ return; ++ ++ d3d12_resource_get_vk_range(resource, offset, size, &vk_range); ++ if ((vr = VK_CALL(vkFlushMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) ++ ERR("Failed to flush memory, vr %d.\n", vr); ++} ++ + static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT sub_resource, + const D3D12_RANGE *read_range, void **data) + { + struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); + unsigned int sub_resource_count; +- HRESULT hr; + + TRACE("iface %p, sub_resource %u, read_range %p, data %p.\n", + iface, sub_resource, read_range, data); +@@ -1259,15 +1243,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT + return E_NOTIMPL; + } + +- WARN("Ignoring read range %p.\n", read_range); +- +- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) +- WARN("Failed to map resource %p, hr %#x.\n", resource, hr); +- + if (data) ++ { ++ *data = d3d12_resource_get_map_ptr(resource); + TRACE("Returning pointer %p.\n", *data); ++ } + +- return hr; ++ if (!read_range) ++ d3d12_resource_invalidate(resource, 0, resource->desc.Width); ++ else if (read_range->End > read_range->Begin) ++ d3d12_resource_invalidate(resource, read_range->Begin, read_range->End - read_range->Begin); ++ ++ return S_OK; + } + + static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT sub_resource, +@@ -1286,9 +1273,10 @@ static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT s + return; + } + +- WARN("Ignoring written range %p.\n", written_range); +- +- d3d12_heap_unmap(resource->heap, resource); ++ if (!written_range) ++ d3d12_resource_flush(resource, 0, resource->desc.Width); ++ else if (written_range->End > written_range->Begin) ++ d3d12_resource_flush(resource, written_range->Begin, written_range->End - written_range->Begin); + } + + static D3D12_RESOURCE_DESC * STDMETHODCALLTYPE d3d12_resource_GetDesc(ID3D12Resource *iface, +@@ -1320,10 +1308,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc + VkImageSubresource vk_sub_resource; + const struct vkd3d_format *format; + VkSubresourceLayout vk_layout; ++ uint64_t dst_offset, dst_size; + struct d3d12_device *device; + uint8_t *dst_data; + D3D12_BOX box; +- HRESULT hr; + + TRACE("iface %p, src_data %p, src_row_pitch %u, src_slice_pitch %u, " + "dst_sub_resource %u, dst_box %s.\n", +@@ -1381,20 +1369,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc + TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", + vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); + +- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&dst_data))) +- { +- WARN("Failed to map resource %p, hr %#x.\n", resource, hr); +- return hr; +- } +- +- dst_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, ++ dst_data = d3d12_resource_get_map_ptr(resource); ++ dst_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, dst_box->left, dst_box->top, dst_box->front); ++ dst_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, ++ vk_layout.depthPitch, dst_box->right, dst_box->bottom - 1, dst_box->back - 1) - dst_offset; + + vkd3d_format_copy_data(format, src_data, src_row_pitch, src_slice_pitch, +- dst_data, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, ++ dst_data + dst_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, + dst_box->bottom - dst_box->top, dst_box->back - dst_box->front); + +- d3d12_heap_unmap(resource->heap, resource); ++ d3d12_resource_flush(resource, dst_offset, dst_size); + + return S_OK; + } +@@ -1408,10 +1393,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour + VkImageSubresource vk_sub_resource; + const struct vkd3d_format *format; + VkSubresourceLayout vk_layout; ++ uint64_t src_offset, src_size; + struct d3d12_device *device; + uint8_t *src_data; + D3D12_BOX box; +- HRESULT hr; + + TRACE("iface %p, dst_data %p, dst_row_pitch %u, dst_slice_pitch %u, " + "src_sub_resource %u, src_box %s.\n", +@@ -1469,21 +1454,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour + TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", + vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); + +- if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&src_data))) +- { +- WARN("Failed to map resource %p, hr %#x.\n", resource, hr); +- return hr; +- } +- +- src_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, ++ src_data = d3d12_resource_get_map_ptr(resource); ++ src_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, src_box->left, src_box->top, src_box->front); ++ src_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, ++ vk_layout.depthPitch, src_box->right, src_box->bottom - 1, src_box->back - 1) - src_offset; + +- vkd3d_format_copy_data(format, src_data, vk_layout.rowPitch, vk_layout.depthPitch, ++ d3d12_resource_invalidate(resource, src_offset, src_size); ++ ++ vkd3d_format_copy_data(format, src_data + src_offset, vk_layout.rowPitch, vk_layout.depthPitch, + dst_data, dst_row_pitch, dst_slice_pitch, src_box->right - src_box->left, + src_box->bottom - src_box->top, src_box->back - src_box->front); + +- d3d12_heap_unmap(resource->heap, resource); +- + return S_OK; + } + +@@ -1941,6 +1923,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, + { + resource->heap = heap; + resource->heap_offset = heap_offset; ++ InterlockedIncrement(&heap->resource_count); + } + else + { +@@ -2061,24 +2044,72 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) + return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); + } + +-/* CBVs, SRVs, UAVs */ +-static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) ++/* Objects are cached so that vkd3d_view_incref() can safely check the refcount ++ * of an object freed by another thread. */ ++static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) + { +- struct vkd3d_view *view; ++ union d3d12_desc_object u; ++ void *next; + +- if ((view = vkd3d_malloc(sizeof(*view)))) ++ do + { +- view->refcount = 1; +- view->type = type; +- view->serial_id = InterlockedIncrement64(&object_global_serial_id); +- view->vk_counter_view = VK_NULL_HANDLE; ++ u.object = cache->head; ++ if (!u.object) ++ return vkd3d_malloc(cache->size); ++ next = u.header->next; + } +- return view; ++ while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); ++ ++ return u.object; ++} ++ ++static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) ++{ ++ union d3d12_desc_object u = {object}; ++ void *head; ++ ++ do ++ { ++ head = cache->head; ++ u.header->next = head; ++ } ++ while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); ++} ++ ++static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) ++{ ++ struct vkd3d_cbuffer_desc *desc; ++ ++ if (!(desc = vkd3d_desc_object_cache_get(&device->cbuffer_desc_cache))) ++ return NULL; ++ ++ desc->h.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; ++ desc->h.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; ++ desc->h.refcount = 1; ++ ++ return desc; + } + +-void vkd3d_view_incref(struct vkd3d_view *view) ++static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_descriptor_type, ++ enum vkd3d_view_type type, struct d3d12_device *device) + { +- InterlockedIncrement(&view->refcount); ++ struct vkd3d_view *view; ++ ++ assert(magic); ++ ++ if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) ++ { ++ ERR("Failed to allocate descriptor object.\n"); ++ return NULL; ++ } ++ ++ view->h.magic = magic; ++ view->h.vk_descriptor_type = vk_descriptor_type; ++ view->h.refcount = 1; ++ view->v.type = type; ++ view->v.vk_counter_view = VK_NULL_HANDLE; ++ ++ return view; + } + + static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) +@@ -2087,313 +2118,299 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev + + TRACE("Destroying view %p.\n", view); + +- switch (view->type) ++ switch (view->v.type) + { + case VKD3D_VIEW_TYPE_BUFFER: +- VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); ++ VK_CALL(vkDestroyBufferView(device->vk_device, view->v.u.vk_buffer_view, NULL)); + break; + case VKD3D_VIEW_TYPE_IMAGE: +- VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); ++ VK_CALL(vkDestroyImageView(device->vk_device, view->v.u.vk_image_view, NULL)); + break; + case VKD3D_VIEW_TYPE_SAMPLER: +- VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); ++ VK_CALL(vkDestroySampler(device->vk_device, view->v.u.vk_sampler, NULL)); + break; + default: +- WARN("Unhandled view type %d.\n", view->type); ++ WARN("Unhandled view type %d.\n", view->v.type); + } + +- if (view->vk_counter_view) +- VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); ++ if (view->v.vk_counter_view) ++ VK_CALL(vkDestroyBufferView(device->vk_device, view->v.vk_counter_view, NULL)); + +- vkd3d_free(view); ++ vkd3d_desc_object_cache_push(&device->view_desc_cache, view); + } + +-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) ++void vkd3d_view_decref(void *view, struct d3d12_device *device) + { +- if (!InterlockedDecrement(&view->refcount)) +- vkd3d_view_destroy(view, device); ++ union d3d12_desc_object u = {view}; ++ ++ if (vkd3d_atomic_decrement(&u.header->refcount)) ++ return; ++ ++ if (u.header->magic != VKD3D_DESCRIPTOR_MAGIC_CBV) ++ vkd3d_view_destroy(u.view, device); ++ else ++ vkd3d_desc_object_cache_push(&device->cbuffer_desc_cache, u.object); + } + +-/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ +-static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set, +- struct d3d12_desc_copy_location *locations, unsigned int write_count) ++static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) + { +- unsigned int i, info_index = 0, write_index = 0; ++ if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) ++ vkd3d_view_decref(view, device); ++} + +- switch (locations[0].src.s.vk_descriptor_type) +- { +- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: +- for (; write_index < write_count; ++write_index) +- { +- descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; +- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) +- descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.s.u.vk_cbv_info; +- } +- break; +- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: +- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: +- for (; write_index < write_count; ++write_index) +- { +- descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; +- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) +- descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.s.u.view_info.view->u.vk_image_view; +- } +- break; +- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: +- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: +- for (; write_index < write_count; ++write_index) +- { +- descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; +- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) +- descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.s.u.view_info.view->u.vk_buffer_view; +- } +- break; +- case VK_DESCRIPTOR_TYPE_SAMPLER: +- for (; write_index < write_count; ++write_index) +- { +- descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; +- for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) +- descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.s.u.view_info.view->u.vk_sampler; +- } +- break; +- default: +- ERR("Unhandled descriptor type %#x.\n", locations[0].src.s.vk_descriptor_type); +- break; +- } ++#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 24 ++ ++struct descriptor_writes ++{ ++ VkDescriptorBufferInfo null_vk_cbv_info; ++ VkBufferView null_vk_buffer_view; ++ VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; ++ VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; ++ void *held_refs[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; ++ unsigned int count; ++ unsigned int held_ref_count; ++}; ++ ++static void descriptor_writes_free_object_refs(struct descriptor_writes *writes, struct d3d12_device *device) ++{ ++ unsigned int i; ++ for (i = 0; i < writes->held_ref_count; ++i) ++ vkd3d_view_decref(writes->held_refs[i], device); ++ writes->held_ref_count = 0; + } + + static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_heap *descriptor_heap, +- uint32_t dst_array_element, const struct d3d12_device *device) ++ uint32_t dst_array_element, struct descriptor_writes *writes, struct d3d12_device *device) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct d3d12_descriptor_heap_vk_set *descriptor_set; +- VkBufferView vk_buffer_view = VK_NULL_HANDLE; +- enum vkd3d_vk_descriptor_set_index i; +- VkDescriptorBufferInfo vk_cbv_info; +- +- vk_cbv_info.buffer = VK_NULL_HANDLE; +- vk_cbv_info.offset = 0; +- vk_cbv_info.range = VK_WHOLE_SIZE; ++ enum vkd3d_vk_descriptor_set_index set; ++ unsigned int i = writes->count; + + /* Binding a shader with the wrong null descriptor type works in Windows. + * To support that here we must write one to all applicable Vulkan sets. */ +- for (i = VKD3D_SET_INDEX_UNIFORM_BUFFER; i <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++i) +- { +- descriptor_set = &descriptor_heap->vk_descriptor_sets[i]; +- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst_array_element; +- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; +- switch (i) ++ for (set = VKD3D_SET_INDEX_UNIFORM_BUFFER; set <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++set) ++ { ++ descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; ++ writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; ++ writes->vk_descriptor_writes[i].pNext = NULL; ++ writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; ++ writes->vk_descriptor_writes[i].dstBinding = 0; ++ writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; ++ writes->vk_descriptor_writes[i].descriptorCount = 1; ++ writes->vk_descriptor_writes[i].descriptorType = descriptor_set->vk_type; ++ switch (set) + { + case VKD3D_SET_INDEX_UNIFORM_BUFFER: +- descriptor_set->vk_descriptor_writes[0].pBufferInfo = &vk_cbv_info; ++ writes->vk_descriptor_writes[i].pImageInfo = NULL; ++ writes->vk_descriptor_writes[i].pBufferInfo = &writes->null_vk_cbv_info; ++ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + break; + case VKD3D_SET_INDEX_SAMPLED_IMAGE: + case VKD3D_SET_INDEX_STORAGE_IMAGE: +- descriptor_set->vk_image_infos[0].imageView = VK_NULL_HANDLE; ++ writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; ++ writes->vk_descriptor_writes[i].pBufferInfo = NULL; ++ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; ++ writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; ++ writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; ++ writes->vk_image_infos[i].imageLayout = (set == VKD3D_SET_INDEX_STORAGE_IMAGE) ++ ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER: + case VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER: +- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &vk_buffer_view; ++ writes->vk_descriptor_writes[i].pImageInfo = NULL; ++ writes->vk_descriptor_writes[i].pBufferInfo = NULL; ++ writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; + break; + default: + assert(false); + break; + } +- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); ++ if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) ++ continue; ++ VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); ++ descriptor_writes_free_object_refs(writes, device); ++ i = 0; + } ++ ++ writes->count = i; + } + +-/* dst and src contain the same data unless another thread overwrites dst. The array index is +- * calculated from dst, and src is thread safe. */ +-static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, +- struct d3d12_device *device) ++static void d3d12_desc_write_vk_heap(struct d3d12_descriptor_heap *descriptor_heap, unsigned int dst_array_element, ++ struct descriptor_writes *writes, void *object, struct d3d12_device *device) + { + struct d3d12_descriptor_heap_vk_set *descriptor_set; +- struct d3d12_descriptor_heap *descriptor_heap; + const struct vkd3d_vk_device_procs *vk_procs; ++ union d3d12_desc_object u = {object}; ++ unsigned int i = writes->count; ++ VkDescriptorType type; + bool is_null = false; + +- descriptor_heap = d3d12_desc_get_descriptor_heap(dst); +- descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( +- src->s.vk_descriptor_type)]; ++ type = u.header->vk_descriptor_type; ++ descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(type)]; + vk_procs = &device->vk_procs; + +- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); +- +- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; +- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; +- switch (src->s.vk_descriptor_type) ++ writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; ++ writes->vk_descriptor_writes[i].pNext = NULL; ++ writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; ++ writes->vk_descriptor_writes[i].dstBinding = 0; ++ writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; ++ writes->vk_descriptor_writes[i].descriptorCount = 1; ++ writes->vk_descriptor_writes[i].descriptorType = type; ++ switch (type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: +- descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->s.u.vk_cbv_info; +- is_null = !src->s.u.vk_cbv_info.buffer; ++ writes->vk_descriptor_writes[i].pImageInfo = NULL; ++ writes->vk_descriptor_writes[i].pBufferInfo = &u.cb_desc->vk_cbv_info; ++ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; ++ is_null = !u.cb_desc->vk_cbv_info.buffer; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: +- is_null = !(descriptor_set->vk_image_infos[0].imageView = src->s.u.view_info.view->u.vk_image_view); ++ writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; ++ writes->vk_descriptor_writes[i].pBufferInfo = NULL; ++ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; ++ writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; ++ is_null = !(writes->vk_image_infos[i].imageView = u.view->v.u.vk_image_view); ++ writes->vk_image_infos[i].imageLayout = (type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ++ ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: +- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->u.vk_buffer_view; +- is_null = !src->s.u.view_info.view->u.vk_buffer_view; ++ writes->vk_descriptor_writes[i].pImageInfo = NULL; ++ writes->vk_descriptor_writes[i].pBufferInfo = NULL; ++ writes->vk_descriptor_writes[i].pTexelBufferView = &u.view->v.u.vk_buffer_view; ++ is_null = !u.view->v.u.vk_buffer_view; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: +- descriptor_set->vk_image_infos[0].sampler = src->s.u.view_info.view->u.vk_sampler; ++ writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; ++ writes->vk_descriptor_writes[i].pBufferInfo = NULL; ++ writes->vk_descriptor_writes[i].pTexelBufferView = NULL; ++ writes->vk_image_infos[i].sampler = u.view->v.u.vk_sampler; ++ writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; ++ writes->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + default: +- ERR("Unhandled descriptor type %#x.\n", src->s.vk_descriptor_type); ++ ERR("Unhandled descriptor type %#x.\n", type); + break; + } + if (is_null && device->vk_info.EXT_robustness2) ++ return d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, dst_array_element, writes, device); ++ ++ ++i; ++ if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view->v.vk_counter_view) + { +- d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, +- descriptor_set->vk_descriptor_writes[0].dstArrayElement, device); +- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +- return; ++ descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; ++ writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; ++ writes->vk_descriptor_writes[i].pNext = NULL; ++ writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; ++ writes->vk_descriptor_writes[i].dstBinding = 0; ++ writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; ++ writes->vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; ++ writes->vk_descriptor_writes[i].descriptorCount = 1; ++ writes->vk_descriptor_writes[i].pImageInfo = NULL; ++ writes->vk_descriptor_writes[i].pBufferInfo = NULL; ++ writes->vk_descriptor_writes[i++].pTexelBufferView = &u.view->v.vk_counter_view; + } + +- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); +- +- if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) ++ if (i >= ARRAY_SIZE(writes->vk_descriptor_writes) - 1) + { +- descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; +- descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; +- descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; +- descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; +- VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); ++ VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); ++ descriptor_writes_free_object_refs(writes, device); ++ i = 0; + } + +- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); ++ writes->count = i; + } + +-static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) ++void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) + { +- struct vkd3d_view *defunct_view; +- struct vkd3d_mutex *mutex; +- +- mutex = d3d12_device_get_descriptor_mutex(device, dst); +- vkd3d_mutex_lock(mutex); ++ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; ++ struct d3d12_desc *descriptors, *src; ++ struct descriptor_writes writes; ++ union d3d12_desc_object u; ++ unsigned int i, next; + +- if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) +- { +- d3d12_desc_copy_raw(dst, src); +- vkd3d_mutex_unlock(mutex); ++ if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) + return; +- } + +- defunct_view = dst->s.u.view_info.view; +- d3d12_desc_copy_raw(dst, src); +- vkd3d_mutex_unlock(mutex); ++ writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; ++ writes.null_vk_cbv_info.offset = 0; ++ writes.null_vk_cbv_info.range = VK_WHOLE_SIZE; ++ writes.null_vk_buffer_view = VK_NULL_HANDLE; ++ writes.count = 0; ++ writes.held_ref_count = 0; + +- /* Destroy the view after unlocking to reduce wait time. */ +- vkd3d_view_destroy(defunct_view, device); +-} +- +-void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, +- struct d3d12_device *device) +-{ +- struct vkd3d_view *defunct_view = NULL; +- struct vkd3d_mutex *mutex; ++ descriptors = (struct d3d12_desc *)descriptor_heap->descriptors; + +- mutex = d3d12_device_get_descriptor_mutex(device, dst); +- vkd3d_mutex_lock(mutex); ++ for (; i != UINT_MAX; i = next) ++ { ++ src = &descriptors[i]; ++ next = (int)src->next >> 1; + +- /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ +- if ((dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +- && !InterlockedDecrement(&dst->s.u.view_info.view->refcount)) +- defunct_view = dst->s.u.view_info.view; ++ u.object = d3d12_desc_get_object_ref(src, device); + +- d3d12_desc_copy_raw(dst, src); ++ if (!u.object) ++ { ++ vkd3d_atomic_exchange(&src->next, 0); ++ continue; ++ } + +- vkd3d_mutex_unlock(mutex); ++ writes.held_refs[writes.held_ref_count++] = u.object; ++ d3d12_desc_write_vk_heap(descriptor_heap, i, &writes, u.object, device); + +- /* Destroy the view after unlocking to reduce wait time. */ +- if (defunct_view) +- vkd3d_view_destroy(defunct_view, device); ++ vkd3d_atomic_exchange(&src->next, 0); ++ } + +- if (device->use_vk_heaps && dst->s.magic) +- d3d12_desc_write_vk_heap(dst, src, device); ++ /* Avoid thunk calls wherever possible. */ ++ if (writes.count) ++ VK_CALL(vkUpdateDescriptorSets(device->vk_device, writes.count, writes.vk_descriptor_writes, 0, NULL)); ++ descriptor_writes_free_object_refs(&writes, device); + } + +-static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) ++static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) + { +- static const struct d3d12_desc null_desc = {0}; ++ struct d3d12_descriptor_heap *descriptor_heap; ++ unsigned int i, head; ++ ++ i = dst->index; ++ descriptor_heap = d3d12_desc_get_descriptor_heap(dst); ++ head = descriptor_heap->dirty_list_head; + +- d3d12_desc_write_atomic(descriptor, &null_desc, device); ++ /* Only one thread can swap the value away from zero. */ ++ if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) ++ return; ++ /* Now it is safe to modify 'next' to another nonzero value if necessary. */ ++ while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) ++ { ++ head = descriptor_heap->dirty_list_head; ++ vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); ++ } + } + +-void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, +- struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, ++void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) + { +- struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; +- const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +- unsigned int i, write_count; +- +- vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); +- +- for (i = 0, write_count = 0; i < info->count; ++i) +- { +- d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); ++ void *object = src->s.u.object; + +- if (i && locations[i].dst == locations[i - 1].dst + 1) +- { +- ++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount; +- continue; +- } +- /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ +- descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst +- - (const struct d3d12_desc *)descriptor_heap->descriptors; +- descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; +- } +- d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count); +- /* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index +- * for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src +- * descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */ +- VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); +- +- if (!info->uav_counter) +- goto done; +- +- descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; +- +- for (i = 0, write_count = 0; i < info->count; ++i) +- { +- if (!locations[i].src.s.u.view_info.view->vk_counter_view) +- continue; +- descriptor_set->vk_buffer_views[write_count] = locations[i].src.s.u.view_info.view->vk_counter_view; +- descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; +- /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ +- descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst +- - (const struct d3d12_desc *)descriptor_heap->descriptors; +- descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; +- } +- VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); ++ d3d12_desc_replace(dst, object, device); ++ if (device->use_vk_heaps && object && !dst->next) ++ d3d12_desc_mark_as_modified(dst); ++} + +-done: +- vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); ++static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) ++{ ++ d3d12_desc_replace(descriptor, NULL, device); + } + + void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) + { + struct d3d12_desc tmp; +- struct vkd3d_mutex *mutex; + + assert(dst != src); + +- /* Shadow of the Tomb Raider and possibly other titles sometimes destroy +- * and rewrite a descriptor in another thread while it is being copied. */ +- mutex = d3d12_device_get_descriptor_mutex(device, src); +- vkd3d_mutex_lock(mutex); +- +- if (src->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +- vkd3d_view_incref(src->s.u.view_info.view); +- +- d3d12_desc_copy_raw(&tmp, src); +- +- vkd3d_mutex_unlock(mutex); +- ++ tmp.s.u.object = d3d12_desc_get_object_ref(src, device); + d3d12_desc_write_atomic(dst, &tmp, device); + } + +@@ -2455,8 +2472,9 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, + return vr == VK_SUCCESS; + } + +-bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, +- VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view) ++bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, ++ const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, ++ struct vkd3d_view **view) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkBufferView vk_view = VK_NULL_HANDLE; +@@ -2465,16 +2483,18 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c + if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) + return false; + +- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER))) ++ if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV ++ ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, ++ VKD3D_VIEW_TYPE_BUFFER, device))) + { + VK_CALL(vkDestroyBufferView(device->vk_device, vk_view, NULL)); + return false; + } + +- object->u.vk_buffer_view = vk_view; +- object->format = format; +- object->info.buffer.offset = offset; +- object->info.buffer.size = size; ++ object->v.u.vk_buffer_view = vk_view; ++ object->v.format = format; ++ object->v.info.buffer.offset = offset; ++ object->v.info.buffer.size = size; + *view = object; + return true; + } +@@ -2482,7 +2502,7 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c + #define VKD3D_VIEW_RAW_BUFFER 0x1 + + static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, +- struct d3d12_resource *resource, DXGI_FORMAT view_format, ++ uint32_t magic, struct d3d12_resource *resource, DXGI_FORMAT view_format, + unsigned int offset, unsigned int size, unsigned int structure_stride, + unsigned int flags, struct vkd3d_view **view) + { +@@ -2513,7 +2533,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, + + assert(d3d12_resource_is_buffer(resource)); + +- return vkd3d_create_buffer_view(device, resource->u.vk_buffer, ++ return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, + format, offset * element_size, size * element_size, view); + } + +@@ -2741,7 +2761,7 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de + desc->layer_count = max_layer_count; + } + +-bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, ++bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, + const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +@@ -2774,18 +2794,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, + } + } + +- if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE))) ++ if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ++ : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VKD3D_VIEW_TYPE_IMAGE, device))) + { + VK_CALL(vkDestroyImageView(device->vk_device, vk_view, NULL)); + return false; + } + +- object->u.vk_image_view = vk_view; +- object->format = format; +- object->info.texture.vk_view_type = desc->view_type; +- object->info.texture.miplevel_idx = desc->miplevel_idx; +- object->info.texture.layer_idx = desc->layer_idx; +- object->info.texture.layer_count = desc->layer_count; ++ object->v.u.vk_image_view = vk_view; ++ object->v.format = format; ++ object->v.info.texture.vk_view_type = desc->view_type; ++ object->v.info.texture.miplevel_idx = desc->miplevel_idx; ++ object->v.info.texture.layer_idx = desc->layer_idx; ++ object->v.info.texture.layer_count = desc->layer_count; + *view = object; + return true; + } +@@ -2794,6 +2815,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, + struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) + { + struct VkDescriptorBufferInfo *buffer_info; ++ struct vkd3d_cbuffer_desc *cb_desc; + struct d3d12_resource *resource; + + if (!desc) +@@ -2802,13 +2824,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, + return; + } + ++ if (!(cb_desc = vkd3d_cbuffer_desc_create(device))) ++ { ++ ERR("Failed to allocate descriptor object.\n"); ++ return; ++ } ++ + if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) + { + WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + return; + } + +- buffer_info = &descriptor->s.u.vk_cbv_info; ++ buffer_info = &cb_desc->vk_cbv_info; + if (desc->BufferLocation) + { + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); +@@ -2824,8 +2852,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, + buffer_info->range = VK_WHOLE_SIZE; + } + +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; ++ descriptor->s.u.cb_desc = cb_desc; + } + + static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) +@@ -2842,7 +2869,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, + { + struct vkd3d_null_resources *null_resources = &device->null_resources; + struct vkd3d_texture_view_desc vkd3d_desc; +- struct vkd3d_view *view; + VkImage vk_image; + + if (!desc) +@@ -2857,15 +2883,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, + if (!device->vk_info.EXT_robustness2) + WARN("Creating NULL buffer SRV %#x.\n", desc->Format); + +- if (vkd3d_create_buffer_view(device, null_resources->vk_buffer, ++ vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, null_resources->vk_buffer, + vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), +- 0, VKD3D_NULL_BUFFER_SIZE, &view)) +- { +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; +- } ++ 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); + return; + + case D3D12_SRV_DIMENSION_TEXTURE2D: +@@ -2904,20 +2924,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, + vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; + vkd3d_desc.allowed_swizzle = true; + +- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) +- return; +- +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; ++ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, vk_image, &vkd3d_desc, &descriptor->s.u.view); + } + + static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, + struct d3d12_device *device, struct d3d12_resource *resource, + const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) + { +- struct vkd3d_view *view; + unsigned int flags; + + if (!desc) +@@ -2933,15 +2946,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, + } + + flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); +- if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, ++ vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource, desc->Format, + desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, +- desc->u.Buffer.StructureByteStride, flags, &view)) +- return; +- +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; ++ desc->u.Buffer.StructureByteStride, flags, &descriptor->s.u.view); + } + + static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, +@@ -2970,7 +2977,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, + const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) + { + struct vkd3d_texture_view_desc vkd3d_desc; +- struct vkd3d_view *view; + + if (!resource) + { +@@ -3002,6 +3008,11 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, + + switch (desc->ViewDimension) + { ++ case D3D12_SRV_DIMENSION_TEXTURE1D: ++ vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_1D; ++ vkd3d_desc.miplevel_idx = desc->u.Texture1D.MostDetailedMip; ++ vkd3d_desc.miplevel_count = desc->u.Texture1D.MipLevels; ++ break; + case D3D12_SRV_DIMENSION_TEXTURE2D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; + vkd3d_desc.miplevel_idx = desc->u.Texture2D.MostDetailedMip; +@@ -3066,13 +3077,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, + } + } + +- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) +- return; +- +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; ++ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource->u.vk_image, &vkd3d_desc, ++ &descriptor->s.u.view); + } + + static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) +@@ -3089,7 +3095,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, + { + struct vkd3d_null_resources *null_resources = &device->null_resources; + struct vkd3d_texture_view_desc vkd3d_desc; +- struct vkd3d_view *view; + VkImage vk_image; + + if (!desc) +@@ -3104,15 +3109,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, + if (!device->vk_info.EXT_robustness2) + WARN("Creating NULL buffer UAV %#x.\n", desc->Format); + +- if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer, ++ vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, null_resources->vk_storage_buffer, + vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), +- 0, VKD3D_NULL_BUFFER_SIZE, &view)) +- { +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; +- } ++ 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); + return; + + case D3D12_UAV_DIMENSION_TEXTURE2D: +@@ -3150,13 +3149,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, + vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; + vkd3d_desc.allowed_swizzle = false; + +- if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) +- return; +- +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; ++ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, vk_image, &vkd3d_desc, &descriptor->s.u.view); + } + + static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, +@@ -3179,16 +3172,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ + } + + flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); +- if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, ++ if (!vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource, desc->Format, + desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, + desc->u.Buffer.StructureByteStride, flags, &view)) + return; + +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; +- + if (counter_resource) + { + const struct vkd3d_format *format; +@@ -3198,13 +3186,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ + + format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, +- desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) ++ desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->v.vk_counter_view)) + { + WARN("Failed to create counter buffer view.\n"); +- view->vk_counter_view = VK_NULL_HANDLE; +- d3d12_desc_destroy(descriptor, device); ++ view->v.vk_counter_view = VK_NULL_HANDLE; ++ vkd3d_view_decref(view, device); ++ return; + } + } ++ ++ descriptor->s.u.view = view; + } + + static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, +@@ -3212,7 +3203,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, + const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) + { + struct vkd3d_texture_view_desc vkd3d_desc; +- struct vkd3d_view *view; + + if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) + return; +@@ -3227,6 +3217,9 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, + { + switch (desc->ViewDimension) + { ++ case D3D12_UAV_DIMENSION_TEXTURE1D: ++ vkd3d_desc.miplevel_idx = desc->u.Texture1D.MipSlice; ++ break; + case D3D12_UAV_DIMENSION_TEXTURE2D: + vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; + if (desc->u.Texture2D.PlaneSlice) +@@ -3257,13 +3250,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, + } + } + +- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) +- return; +- +- descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; +- descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; +- descriptor->s.u.view_info.view = view; +- descriptor->s.u.view_info.written_serial_id = view->serial_id; ++ vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_image, &vkd3d_desc, ++ &descriptor->s.u.view); + } + + void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, +@@ -3291,12 +3279,26 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d + } + + bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, +- D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) ++ D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view) + { + const struct vkd3d_format *format; + struct d3d12_resource *resource; + + format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); ++ ++ if (!gpu_address) ++ { ++ if (device->vk_info.EXT_robustness2) ++ { ++ *vk_buffer_view = VK_NULL_HANDLE; ++ return true; ++ } ++ WARN("Creating null buffer view.\n"); ++ return vkd3d_create_vk_buffer_view(device, parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV ++ ? device->null_resources.vk_storage_buffer : device->null_resources.vk_buffer, ++ format, 0, VK_WHOLE_SIZE, vk_buffer_view); ++ } ++ + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); + assert(d3d12_resource_is_buffer(resource)); + return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, +@@ -3412,21 +3414,21 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, + FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", + desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]); + +- if (!(view = vkd3d_view_create(VKD3D_VIEW_TYPE_SAMPLER))) ++ if (!(view = vkd3d_view_create(VKD3D_DESCRIPTOR_MAGIC_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER, ++ VKD3D_VIEW_TYPE_SAMPLER, device))) + return; ++ view->v.u.vk_sampler = VK_NULL_HANDLE; ++ view->v.format = NULL; + + if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, + desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, +- desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) ++ desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->v.u.vk_sampler) < 0) + { +- vkd3d_free(view); ++ vkd3d_view_decref(view, device); + return; + } + +- sampler->s.magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; +- sampler->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; +- sampler->s.u.view_info.view = view; +- sampler->s.u.view_info.written_serial_id = view->serial_id; ++ sampler->s.u.view = view; + } + + HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, +@@ -3448,7 +3450,7 @@ HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, + /* RTVs */ + static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_device *device) + { +- if (rtv->magic != VKD3D_DESCRIPTOR_MAGIC_RTV) ++ if (!rtv->view) + return; + + vkd3d_view_decref(rtv->view, device); +@@ -3527,10 +3529,9 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev + + assert(d3d12_resource_is_texture(resource)); + +- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) ++ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) + return; + +- rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; + rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); + rtv_desc->format = vkd3d_desc.format; + rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); +@@ -3543,7 +3544,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev + /* DSVs */ + static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_device *device) + { +- if (dsv->magic != VKD3D_DESCRIPTOR_MAGIC_DSV) ++ if (!dsv->view) + return; + + vkd3d_view_decref(dsv->view, device); +@@ -3612,10 +3613,9 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev + + assert(d3d12_resource_is_texture(resource)); + +- if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) ++ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) + return; + +- dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; + dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); + dsv_desc->format = vkd3d_desc.format; + dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); +@@ -3883,7 +3883,6 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; + VkDescriptorSetAllocateInfo set_desc; +- unsigned int i; + VkResult vr; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; +@@ -3897,8 +3896,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript + set_size.pDescriptorCounts = &variable_binding_size; + if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) + { +- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) +- descriptor_set->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; ++ descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; + return S_OK; + } + +@@ -3914,7 +3912,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri + + descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; + memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); +- vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + + if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) +@@ -3925,53 +3922,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri + + for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) + { +- struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; +- unsigned int i; +- +- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) +- { +- descriptor_set->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; +- descriptor_set->vk_descriptor_writes[i].pNext = NULL; +- descriptor_set->vk_descriptor_writes[i].dstBinding = 0; +- descriptor_set->vk_descriptor_writes[i].descriptorType = device->vk_descriptor_heap_layouts[set].type; +- descriptor_set->vk_descriptor_writes[i].pImageInfo = NULL; +- descriptor_set->vk_descriptor_writes[i].pBufferInfo = NULL; +- descriptor_set->vk_descriptor_writes[i].pTexelBufferView = NULL; +- } +- switch (device->vk_descriptor_heap_layouts[set].type) +- { +- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: +- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: +- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: +- break; +- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: +- descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; +- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) +- { +- descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; +- descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; +- } +- break; +- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: +- descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; +- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) +- { +- descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; +- descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; +- } +- break; +- case VK_DESCRIPTOR_TYPE_SAMPLER: +- descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; +- for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) +- { +- descriptor_set->vk_image_infos[i].imageView = VK_NULL_HANDLE; +- descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; +- } +- break; +- default: +- ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); +- return E_FAIL; +- } + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type + && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) + return hr; +@@ -3995,6 +3945,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript + return hr; + + d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); ++ vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + + d3d12_device_add_ref(descriptor_heap->device = device); + +@@ -4047,7 +3998,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, + { + memset(&dst[i].s, 0, sizeof(dst[i].s)); + dst[i].index = i; ++ dst[i].next = 0; + } ++ object->dirty_list_head = UINT_MAX; + } + else + { +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 77b795d6278..b0150754434 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -31,8 +31,8 @@ + #include "vkd3d_blob.h" + #include "vkd3d_memory.h" + #include "vkd3d_utf8.h" +-#include "wine/list.h" +-#include "wine/rbtree.h" ++#include "list.h" ++#include "rbtree.h" + + #include "vkd3d.h" + #include "vkd3d_shader.h" +@@ -44,13 +44,11 @@ + + #define VK_CALL(f) (vk_procs->f) + +-#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u +- + #define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u + #define VKD3D_DESCRIPTOR_MAGIC_CBV VKD3D_MAKE_TAG('C', 'B', 'V', 0) +-#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 1) +-#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 1) +-#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 1) ++#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 0) ++#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 0) ++#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 0) + #define VKD3D_DESCRIPTOR_MAGIC_DSV VKD3D_MAKE_TAG('D', 'S', 'V', 0) + #define VKD3D_DESCRIPTOR_MAGIC_RTV VKD3D_MAKE_TAG('R', 'T', 'V', 0) + +@@ -252,6 +250,31 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) + { + } + ++static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) ++{ ++ return InterlockedDecrement((LONG volatile *)x); ++} ++ ++static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) ++{ ++ return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; ++} ++ ++static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) ++{ ++ return InterlockedExchange((LONG volatile *)x, val); ++} ++ ++static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) ++{ ++ return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; ++} ++ ++static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) ++{ ++ return InterlockedExchangePointer(x, val); ++} ++ + #else /* _WIN32 */ + + #include +@@ -354,6 +377,63 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) + ERR("Could not destroy the condition variable, error %d.\n", ret); + } + ++# if HAVE_SYNC_SUB_AND_FETCH ++static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) ++{ ++ return __sync_sub_and_fetch(x, 1); ++} ++# else ++# error "vkd3d_atomic_decrement() not implemented for this platform" ++# endif /* HAVE_SYNC_ADD_AND_FETCH */ ++ ++# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP ++static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) ++{ ++ return __sync_bool_compare_and_swap(x, cmp, xchg); ++} ++ ++static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) ++{ ++ return __sync_bool_compare_and_swap(x, cmp, xchg); ++} ++# else ++# error "vkd3d_atomic_compare_exchange() not implemented for this platform" ++# endif ++ ++# if HAVE_ATOMIC_EXCHANGE_N ++static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) ++{ ++ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); ++} ++ ++static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) ++{ ++ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); ++} ++# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP ++static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) ++{ ++ unsigned int i; ++ do ++ { ++ i = *x; ++ } while (!__sync_bool_compare_and_swap(x, i, val)); ++ return i; ++} ++ ++static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) ++{ ++ void *p; ++ do ++ { ++ p = *x; ++ } while (!__sync_bool_compare_and_swap(x, p, val)); ++ return p; ++} ++# else ++# error "vkd3d_atomic_exchange() not implemented for this platform" ++# endif ++ + #endif /* _WIN32 */ + + HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, +@@ -563,6 +643,7 @@ struct d3d12_heap + { + ID3D12Heap ID3D12Heap_iface; + LONG refcount; ++ LONG resource_count; + + bool is_private; + D3D12_HEAP_DESC desc; +@@ -661,11 +742,9 @@ enum vkd3d_view_type + VKD3D_VIEW_TYPE_SAMPLER, + }; + +-struct vkd3d_view ++struct vkd3d_resource_view + { +- LONG refcount; + enum vkd3d_view_type type; +- uint64_t serial_id; + union + { + VkBufferView vk_buffer_view; +@@ -691,9 +770,6 @@ struct vkd3d_view + } info; + }; + +-void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device); +-void vkd3d_view_incref(struct vkd3d_view *view); +- + struct vkd3d_texture_view_desc + { + VkImageViewType view_type; +@@ -707,32 +783,88 @@ struct vkd3d_texture_view_desc + bool allowed_swizzle; + }; + +-bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, +- VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); +-bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, ++struct vkd3d_desc_header ++{ ++ uint32_t magic; ++ unsigned int volatile refcount; ++ void *next; ++ VkDescriptorType vk_descriptor_type; ++}; ++ ++struct vkd3d_view ++{ ++ struct vkd3d_desc_header h; ++ struct vkd3d_resource_view v; ++}; ++ ++bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, ++ const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); ++bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, + const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view); + +-struct vkd3d_view_info ++struct vkd3d_cbuffer_desc + { +- uint64_t written_serial_id; +- struct vkd3d_view *view; ++ struct vkd3d_desc_header h; ++ VkDescriptorBufferInfo vk_cbv_info; + }; + + struct d3d12_desc + { + struct + { +- uint32_t magic; +- VkDescriptorType vk_descriptor_type; +- union ++ union d3d12_desc_object + { +- VkDescriptorBufferInfo vk_cbv_info; +- struct vkd3d_view_info view_info; ++ struct vkd3d_desc_header *header; ++ struct vkd3d_view *view; ++ struct vkd3d_cbuffer_desc *cb_desc; ++ void *object; + } u; + } s; + unsigned int index; ++ unsigned int next; + }; + ++void vkd3d_view_decref(void *view, struct d3d12_device *device); ++ ++static inline bool vkd3d_view_incref(void *desc) ++{ ++ struct vkd3d_desc_header *h = desc; ++ unsigned int refcount; ++ ++ do ++ { ++ refcount = h->refcount; ++ /* Avoid incrementing a freed object. Reading the value is safe because objects are recycled. */ ++ if (refcount <= 0) ++ return false; ++ } ++ while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); ++ ++ return true; ++} ++ ++static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc *src, struct d3d12_device *device) ++{ ++ void *view; ++ ++ /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors ++ * from multiple threads without syncronisation. This is apparently valid in Windows. */ ++ for (;;) ++ { ++ do ++ { ++ view = src->s.u.object; ++ } while (view && !vkd3d_view_incref(view)); ++ ++ /* Check if the object is still in src to handle the case where it was ++ * already freed and reused elsewhere when the refcount was incremented. */ ++ if (view == src->s.u.object) ++ return view; ++ ++ vkd3d_view_decref(view, device); ++ } ++} ++ + static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) + { + return (struct d3d12_desc *)cpu_handle.ptr; +@@ -761,13 +893,12 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device * + void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); + + bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, +- D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); ++ D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view); + HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, + const D3D12_STATIC_SAMPLER_DESC *desc, VkSampler *vk_sampler); + + struct d3d12_rtv_desc + { +- uint32_t magic; + VkSampleCountFlagBits sample_count; + const struct vkd3d_format *format; + uint64_t width; +@@ -787,7 +918,6 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev + + struct d3d12_dsv_desc + { +- uint32_t magic; + VkSampleCountFlagBits sample_count; + const struct vkd3d_format *format; + uint64_t width; +@@ -837,15 +967,10 @@ struct vkd3d_vk_descriptor_heap_layout + VkDescriptorSetLayout vk_set_layout; + }; + +-#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 64 +- + struct d3d12_descriptor_heap_vk_set + { + VkDescriptorSet vk_set; +- VkDescriptorBufferInfo vk_buffer_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; +- VkBufferView vk_buffer_views[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; +- VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; +- VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; ++ VkDescriptorType vk_type; + }; + + /* ID3D12DescriptorHeap */ +@@ -865,9 +990,13 @@ struct d3d12_descriptor_heap + struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; + struct vkd3d_mutex vk_sets_mutex; + +- BYTE descriptors[]; ++ unsigned int volatile dirty_list_head; ++ ++ uint8_t DECLSPEC_ALIGN(sizeof(void *)) descriptors[]; + }; + ++void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device); ++ + static inline struct d3d12_descriptor_heap *d3d12_desc_get_descriptor_heap(const struct d3d12_desc *descriptor) + { + return CONTAINING_RECORD(descriptor - descriptor->index, struct d3d12_descriptor_heap, descriptors); +@@ -882,22 +1011,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d + HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, + const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap); + +-struct d3d12_desc_copy_location +-{ +- struct d3d12_desc src; +- struct d3d12_desc *dst; +-}; +- +-struct d3d12_desc_copy_info +-{ +- unsigned int count; +- bool uav_counter; +-}; +- +-void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, +- struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, +- struct d3d12_device *device); +- + /* ID3D12QueryHeap */ + struct d3d12_query_heap + { +@@ -1295,6 +1408,8 @@ struct d3d12_command_list + VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; + + void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); ++ struct d3d12_descriptor_heap *descriptor_heaps[64]; ++ unsigned int descriptor_heap_count; + + struct vkd3d_private_store private_store; + }; +@@ -1485,6 +1600,12 @@ struct vkd3d_uav_clear_state + HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); + void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); + ++struct vkd3d_desc_object_cache ++{ ++ void * volatile head; ++ size_t size; ++}; ++ + #define VKD3D_DESCRIPTOR_POOL_COUNT 6 + + /* ID3D12Device */ +@@ -1502,7 +1623,8 @@ struct d3d12_device + struct vkd3d_gpu_va_allocator gpu_va_allocator; + + struct vkd3d_mutex mutex; +- struct vkd3d_mutex desc_mutex[8]; ++ struct vkd3d_desc_object_cache view_desc_cache; ++ struct vkd3d_desc_object_cache cbuffer_desc_cache; + struct vkd3d_render_pass_cache render_pass_cache; + VkPipelineCache vk_pipeline_cache; + +@@ -1544,6 +1666,7 @@ struct d3d12_device + struct vkd3d_uav_clear_state uav_clear_state; + + VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; ++ unsigned int vk_pool_count; + struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; + bool use_vk_heaps; + }; +@@ -1577,19 +1700,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str + return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); + } + +-static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, +- const struct d3d12_desc *descriptor) +-{ +- STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); +- uintptr_t idx = (uintptr_t)descriptor; +- +- idx ^= idx >> 12; +- idx ^= idx >> 6; +- idx ^= idx >> 3; +- +- return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; +-} +- + /* utils */ + enum vkd3d_format_type + { +-- +2.40.1 + diff --git a/patches/vkd3d-latest/definition b/patches/vkd3d-latest/definition new file mode 100644 index 00000000..c330a4dd --- /dev/null +++ b/patches/vkd3d-latest/definition @@ -0,0 +1,7 @@ +#Update vkd3d to the latest to allow testing before it's +# finally be integrated into wine. +# Bugs for this patchset should be fixed in the usually place against vkd3d + +# Games used for testing +# Stranded Alien Dawn - Requires dxvk +# DOOM Eternal