From ffc72a34adff66a7e763f682288f95498b8b5f28 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 17 May 2023 08:35:40 +1000 Subject: [PATCH] Update vkd3d to 771e442af16228a977eebba82224f06f6d0202fe (1.8) --- libs/vkd3d/Makefile.in | 6 +- libs/vkd3d/include/list.h | 270 + libs/vkd3d/include/private/list.h | 270 + libs/vkd3d/include/private/rbtree.h | 378 ++ libs/vkd3d/include/private/vkd3d_common.h | 3 +- libs/vkd3d/include/private/vkd3d_debug.h | 2 +- libs/vkd3d/include/private/vkd3d_test.h | 432 ++ libs/vkd3d/include/vkd3d.h | 1 + libs/vkd3d/include/vkd3d_d3d9types.h | 237 + libs/vkd3d/include/vkd3d_d3dcompiler.h | 74 + libs/vkd3d/include/vkd3d_shader.h | 1 + libs/vkd3d/include/vkd3d_utils.h | 108 + libs/vkd3d/include/vkd3d_windows.h | 284 + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-common/debug.c | 4 +- .../libs/vkd3d-shader/{trace.c => d3d_asm.c} | 25 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1131 +++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 1773 +----- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 619 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 235 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 8 + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2362 +++++--- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1821 ++++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 358 +- libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c | 980 --- libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c | 2531 -------- libs/vkd3d/libs/vkd3d-shader/ir.c | 1072 ++++ libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 6 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 1325 ++--- libs/vkd3d/libs/vkd3d-shader/tpf.c | 5234 +++++++++++++++++ .../libs/vkd3d-shader/vkd3d_shader_main.c | 241 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 91 +- libs/vkd3d/libs/vkd3d/command.c | 160 +- libs/vkd3d/libs/vkd3d/device.c | 205 +- libs/vkd3d/libs/vkd3d/resource.c | 935 ++- libs/vkd3d/libs/vkd3d/state.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 238 +- 38 files changed, 14623 insertions(+), 8804 deletions(-) create mode 100644 libs/vkd3d/include/list.h create mode 100644 libs/vkd3d/include/private/list.h create mode 100644 libs/vkd3d/include/private/rbtree.h create mode 100644 libs/vkd3d/include/private/vkd3d_test.h create mode 100644 libs/vkd3d/include/vkd3d_d3d9types.h create mode 100644 libs/vkd3d/include/vkd3d_d3dcompiler.h create mode 100644 libs/vkd3d/include/vkd3d_utils.h create mode 100644 libs/vkd3d/include/vkd3d_windows.h rename libs/vkd3d/libs/vkd3d-shader/{trace.c => d3d_asm.c} (98%) delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c delete mode 100644 libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c create mode 100644 libs/vkd3d/libs/vkd3d-shader/ir.c create mode 100644 libs/vkd3d/libs/vkd3d-shader/tpf.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 0ed4e27ad83..1ba0e9f71e1 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -14,6 +14,7 @@ SOURCES = \ libs/vkd3d-common/memory.c \ libs/vkd3d-common/utf8.c \ libs/vkd3d-shader/checksum.c \ + libs/vkd3d-shader/d3d_asm.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ libs/vkd3d-shader/glsl.c \ @@ -22,12 +23,11 @@ SOURCES = \ libs/vkd3d-shader/hlsl.y \ libs/vkd3d-shader/hlsl_codegen.c \ libs/vkd3d-shader/hlsl_constant_ops.c \ - libs/vkd3d-shader/hlsl_sm1.c \ - libs/vkd3d-shader/hlsl_sm4.c \ + libs/vkd3d-shader/ir.c \ libs/vkd3d-shader/preproc.l \ libs/vkd3d-shader/preproc.y \ libs/vkd3d-shader/spirv.c \ - libs/vkd3d-shader/trace.c \ + libs/vkd3d-shader/tpf.c \ libs/vkd3d-shader/vkd3d_shader_main.c \ libs/vkd3d/command.c \ libs/vkd3d/device.c \ diff --git a/libs/vkd3d/include/list.h b/libs/vkd3d/include/list.h new file mode 100644 index 00000000000..2e1d95f3fd4 --- /dev/null +++ b/libs/vkd3d/include/list.h @@ -0,0 +1,270 @@ +/* + * Linked lists support + * + * Copyright (C) 2002 Alexandre Julliard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __WINE_SERVER_LIST_H +#define __WINE_SERVER_LIST_H + +#include + +struct list +{ + struct list *next; + struct list *prev; +}; + +/* Define a list like so: + * + * struct gadget + * { + * struct list entry; <-- doesn't have to be the first item in the struct + * int a, b; + * }; + * + * static struct list global_gadgets = LIST_INIT( global_gadgets ); + * + * or + * + * struct some_global_thing + * { + * struct list gadgets; + * }; + * + * list_init( &some_global_thing->gadgets ); + * + * Manipulate it like this: + * + * list_add_head( &global_gadgets, &new_gadget->entry ); + * list_remove( &new_gadget->entry ); + * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); + * + * And to iterate over it: + * + * struct gadget *gadget; + * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) + * { + * ... + * } + * + */ + +/* add an element after the specified one */ +static inline void list_add_after( struct list *elem, struct list *to_add ) +{ + to_add->next = elem->next; + to_add->prev = elem; + elem->next->prev = to_add; + elem->next = to_add; +} + +/* add an element before the specified one */ +static inline void list_add_before( struct list *elem, struct list *to_add ) +{ + to_add->next = elem; + to_add->prev = elem->prev; + elem->prev->next = to_add; + elem->prev = to_add; +} + +/* add element at the head of the list */ +static inline void list_add_head( struct list *list, struct list *elem ) +{ + list_add_after( list, elem ); +} + +/* add element at the tail of the list */ +static inline void list_add_tail( struct list *list, struct list *elem ) +{ + list_add_before( list, elem ); +} + +/* remove an element from its list */ +static inline void list_remove( struct list *elem ) +{ + elem->next->prev = elem->prev; + elem->prev->next = elem->next; +} + +/* get the next element */ +static inline struct list *list_next( const struct list *list, const struct list *elem ) +{ + struct list *ret = elem->next; + if (elem->next == list) ret = NULL; + return ret; +} + +/* get the previous element */ +static inline struct list *list_prev( const struct list *list, const struct list *elem ) +{ + struct list *ret = elem->prev; + if (elem->prev == list) ret = NULL; + return ret; +} + +/* get the first element */ +static inline struct list *list_head( const struct list *list ) +{ + return list_next( list, list ); +} + +/* get the last element */ +static inline struct list *list_tail( const struct list *list ) +{ + return list_prev( list, list ); +} + +/* check if a list is empty */ +static inline int list_empty( const struct list *list ) +{ + return list->next == list; +} + +/* initialize a list */ +static inline void list_init( struct list *list ) +{ + list->next = list->prev = list; +} + +/* count the elements of a list */ +static inline unsigned int list_count( const struct list *list ) +{ + unsigned count = 0; + const struct list *ptr; + for (ptr = list->next; ptr != list; ptr = ptr->next) count++; + return count; +} + +/* move all elements from src to before the specified element */ +static inline void list_move_before( struct list *dst, struct list *src ) +{ + if (list_empty(src)) return; + + dst->prev->next = src->next; + src->next->prev = dst->prev; + dst->prev = src->prev; + src->prev->next = dst; + list_init(src); +} + +/* move all elements from src to after the specified element */ +static inline void list_move_after( struct list *dst, struct list *src ) +{ + if (list_empty(src)) return; + + dst->next->prev = src->prev; + src->prev->next = dst->next; + dst->next = src->next; + src->next->prev = dst; + list_init(src); +} + +/* move all elements from src to the head of dst */ +static inline void list_move_head( struct list *dst, struct list *src ) +{ + list_move_after( dst, src ); +} + +/* move all elements from src to the tail of dst */ +static inline void list_move_tail( struct list *dst, struct list *src ) +{ + list_move_before( dst, src ); +} + +/* move the slice of elements from begin to end inclusive to the head of dst */ +static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_next = dst->next; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst->next = begin; + dst_next->prev = end; + begin->prev = dst; + end->next = dst_next; +} + +/* move the slice of elements from begin to end inclusive to the tail of dst */ +static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_prev = dst->prev; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst_prev->next = begin; + dst->prev = end; + begin->prev = dst_prev; + end->next = dst; +} + +/* iterate through the list */ +#define LIST_FOR_EACH(cursor,list) \ + for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) + +/* iterate through the list, with safety against removal */ +#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ + for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ + (cursor) != (list); \ + (cursor) = (cursor2), (cursor2) = (cursor)->next) + +/* iterate through the list using a list entry */ +#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ + for ((elem) = LIST_ENTRY((list)->next, type, field); \ + &(elem)->field != (list); \ + (elem) = LIST_ENTRY((elem)->field.next, type, field)) + +/* iterate through the list using a list entry, with safety against removal */ +#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ + for ((cursor) = LIST_ENTRY((list)->next, type, field), \ + (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ + &(cursor)->field != (list); \ + (cursor) = (cursor2), \ + (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) + +/* iterate through the list in reverse order */ +#define LIST_FOR_EACH_REV(cursor,list) \ + for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) + +/* iterate through the list in reverse order, with safety against removal */ +#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ + for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ + (cursor) != (list); \ + (cursor) = (cursor2), (cursor2) = (cursor)->prev) + +/* iterate through the list in reverse order using a list entry */ +#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ + for ((elem) = LIST_ENTRY((list)->prev, type, field); \ + &(elem)->field != (list); \ + (elem) = LIST_ENTRY((elem)->field.prev, type, field)) + +/* iterate through the list in reverse order using a list entry, with safety against removal */ +#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ + for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ + (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ + &(cursor)->field != (list); \ + (cursor) = (cursor2), \ + (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) + +/* macros for statically initialized lists */ +#undef LIST_INIT +#define LIST_INIT(list) { &(list), &(list) } + +/* get pointer to object containing list element */ +#undef LIST_ENTRY +#define LIST_ENTRY(elem, type, field) \ + ((type *)((char *)(elem) - offsetof(type, field))) + +#endif /* __WINE_SERVER_LIST_H */ diff --git a/libs/vkd3d/include/private/list.h b/libs/vkd3d/include/private/list.h new file mode 100644 index 00000000000..2e1d95f3fd4 --- /dev/null +++ b/libs/vkd3d/include/private/list.h @@ -0,0 +1,270 @@ +/* + * Linked lists support + * + * Copyright (C) 2002 Alexandre Julliard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __WINE_SERVER_LIST_H +#define __WINE_SERVER_LIST_H + +#include + +struct list +{ + struct list *next; + struct list *prev; +}; + +/* Define a list like so: + * + * struct gadget + * { + * struct list entry; <-- doesn't have to be the first item in the struct + * int a, b; + * }; + * + * static struct list global_gadgets = LIST_INIT( global_gadgets ); + * + * or + * + * struct some_global_thing + * { + * struct list gadgets; + * }; + * + * list_init( &some_global_thing->gadgets ); + * + * Manipulate it like this: + * + * list_add_head( &global_gadgets, &new_gadget->entry ); + * list_remove( &new_gadget->entry ); + * list_add_after( &some_random_gadget->entry, &new_gadget->entry ); + * + * And to iterate over it: + * + * struct gadget *gadget; + * LIST_FOR_EACH_ENTRY( gadget, &global_gadgets, struct gadget, entry ) + * { + * ... + * } + * + */ + +/* add an element after the specified one */ +static inline void list_add_after( struct list *elem, struct list *to_add ) +{ + to_add->next = elem->next; + to_add->prev = elem; + elem->next->prev = to_add; + elem->next = to_add; +} + +/* add an element before the specified one */ +static inline void list_add_before( struct list *elem, struct list *to_add ) +{ + to_add->next = elem; + to_add->prev = elem->prev; + elem->prev->next = to_add; + elem->prev = to_add; +} + +/* add element at the head of the list */ +static inline void list_add_head( struct list *list, struct list *elem ) +{ + list_add_after( list, elem ); +} + +/* add element at the tail of the list */ +static inline void list_add_tail( struct list *list, struct list *elem ) +{ + list_add_before( list, elem ); +} + +/* remove an element from its list */ +static inline void list_remove( struct list *elem ) +{ + elem->next->prev = elem->prev; + elem->prev->next = elem->next; +} + +/* get the next element */ +static inline struct list *list_next( const struct list *list, const struct list *elem ) +{ + struct list *ret = elem->next; + if (elem->next == list) ret = NULL; + return ret; +} + +/* get the previous element */ +static inline struct list *list_prev( const struct list *list, const struct list *elem ) +{ + struct list *ret = elem->prev; + if (elem->prev == list) ret = NULL; + return ret; +} + +/* get the first element */ +static inline struct list *list_head( const struct list *list ) +{ + return list_next( list, list ); +} + +/* get the last element */ +static inline struct list *list_tail( const struct list *list ) +{ + return list_prev( list, list ); +} + +/* check if a list is empty */ +static inline int list_empty( const struct list *list ) +{ + return list->next == list; +} + +/* initialize a list */ +static inline void list_init( struct list *list ) +{ + list->next = list->prev = list; +} + +/* count the elements of a list */ +static inline unsigned int list_count( const struct list *list ) +{ + unsigned count = 0; + const struct list *ptr; + for (ptr = list->next; ptr != list; ptr = ptr->next) count++; + return count; +} + +/* move all elements from src to before the specified element */ +static inline void list_move_before( struct list *dst, struct list *src ) +{ + if (list_empty(src)) return; + + dst->prev->next = src->next; + src->next->prev = dst->prev; + dst->prev = src->prev; + src->prev->next = dst; + list_init(src); +} + +/* move all elements from src to after the specified element */ +static inline void list_move_after( struct list *dst, struct list *src ) +{ + if (list_empty(src)) return; + + dst->next->prev = src->prev; + src->prev->next = dst->next; + dst->next = src->next; + src->next->prev = dst; + list_init(src); +} + +/* move all elements from src to the head of dst */ +static inline void list_move_head( struct list *dst, struct list *src ) +{ + list_move_after( dst, src ); +} + +/* move all elements from src to the tail of dst */ +static inline void list_move_tail( struct list *dst, struct list *src ) +{ + list_move_before( dst, src ); +} + +/* move the slice of elements from begin to end inclusive to the head of dst */ +static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_next = dst->next; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst->next = begin; + dst_next->prev = end; + begin->prev = dst; + end->next = dst_next; +} + +/* move the slice of elements from begin to end inclusive to the tail of dst */ +static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_prev = dst->prev; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst_prev->next = begin; + dst->prev = end; + begin->prev = dst_prev; + end->next = dst; +} + +/* iterate through the list */ +#define LIST_FOR_EACH(cursor,list) \ + for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) + +/* iterate through the list, with safety against removal */ +#define LIST_FOR_EACH_SAFE(cursor, cursor2, list) \ + for ((cursor) = (list)->next, (cursor2) = (cursor)->next; \ + (cursor) != (list); \ + (cursor) = (cursor2), (cursor2) = (cursor)->next) + +/* iterate through the list using a list entry */ +#define LIST_FOR_EACH_ENTRY(elem, list, type, field) \ + for ((elem) = LIST_ENTRY((list)->next, type, field); \ + &(elem)->field != (list); \ + (elem) = LIST_ENTRY((elem)->field.next, type, field)) + +/* iterate through the list using a list entry, with safety against removal */ +#define LIST_FOR_EACH_ENTRY_SAFE(cursor, cursor2, list, type, field) \ + for ((cursor) = LIST_ENTRY((list)->next, type, field), \ + (cursor2) = LIST_ENTRY((cursor)->field.next, type, field); \ + &(cursor)->field != (list); \ + (cursor) = (cursor2), \ + (cursor2) = LIST_ENTRY((cursor)->field.next, type, field)) + +/* iterate through the list in reverse order */ +#define LIST_FOR_EACH_REV(cursor,list) \ + for ((cursor) = (list)->prev; (cursor) != (list); (cursor) = (cursor)->prev) + +/* iterate through the list in reverse order, with safety against removal */ +#define LIST_FOR_EACH_SAFE_REV(cursor, cursor2, list) \ + for ((cursor) = (list)->prev, (cursor2) = (cursor)->prev; \ + (cursor) != (list); \ + (cursor) = (cursor2), (cursor2) = (cursor)->prev) + +/* iterate through the list in reverse order using a list entry */ +#define LIST_FOR_EACH_ENTRY_REV(elem, list, type, field) \ + for ((elem) = LIST_ENTRY((list)->prev, type, field); \ + &(elem)->field != (list); \ + (elem) = LIST_ENTRY((elem)->field.prev, type, field)) + +/* iterate through the list in reverse order using a list entry, with safety against removal */ +#define LIST_FOR_EACH_ENTRY_SAFE_REV(cursor, cursor2, list, type, field) \ + for ((cursor) = LIST_ENTRY((list)->prev, type, field), \ + (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field); \ + &(cursor)->field != (list); \ + (cursor) = (cursor2), \ + (cursor2) = LIST_ENTRY((cursor)->field.prev, type, field)) + +/* macros for statically initialized lists */ +#undef LIST_INIT +#define LIST_INIT(list) { &(list), &(list) } + +/* get pointer to object containing list element */ +#undef LIST_ENTRY +#define LIST_ENTRY(elem, type, field) \ + ((type *)((char *)(elem) - offsetof(type, field))) + +#endif /* __WINE_SERVER_LIST_H */ diff --git a/libs/vkd3d/include/private/rbtree.h b/libs/vkd3d/include/private/rbtree.h new file mode 100644 index 00000000000..b5d38bca54c --- /dev/null +++ b/libs/vkd3d/include/private/rbtree.h @@ -0,0 +1,378 @@ +/* + * Red-black search tree support + * + * Copyright 2009 Henri Verbeet + * Copyright 2009 Andrew Riedi + * Copyright 2016 Jacek Caban for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __WINE_WINE_RBTREE_H +#define __WINE_WINE_RBTREE_H + +#define RB_ENTRY_VALUE(element, type, field) \ + ((type *)((char *)(element) - offsetof(type, field))) + +struct rb_entry +{ + struct rb_entry *parent; + struct rb_entry *left; + struct rb_entry *right; + unsigned int flags; +}; + +typedef int (*rb_compare_func)(const void *key, const struct rb_entry *entry); + +struct rb_tree +{ + rb_compare_func compare; + struct rb_entry *root; +}; + +typedef void (rb_traverse_func)(struct rb_entry *entry, void *context); + +#define RB_FLAG_RED 0x1 + +static inline int rb_is_red(struct rb_entry *entry) +{ + return entry && (entry->flags & RB_FLAG_RED); +} + +static inline void rb_rotate_left(struct rb_tree *tree, struct rb_entry *e) +{ + struct rb_entry *right = e->right; + + if (!e->parent) + tree->root = right; + else if (e->parent->left == e) + e->parent->left = right; + else + e->parent->right = right; + + e->right = right->left; + if (e->right) e->right->parent = e; + right->left = e; + right->parent = e->parent; + e->parent = right; +} + +static inline void rb_rotate_right(struct rb_tree *tree, struct rb_entry *e) +{ + struct rb_entry *left = e->left; + + if (!e->parent) + tree->root = left; + else if (e->parent->left == e) + e->parent->left = left; + else + e->parent->right = left; + + e->left = left->right; + if (e->left) e->left->parent = e; + left->right = e; + left->parent = e->parent; + e->parent = left; +} + +static inline void rb_flip_color(struct rb_entry *entry) +{ + entry->flags ^= RB_FLAG_RED; + entry->left->flags ^= RB_FLAG_RED; + entry->right->flags ^= RB_FLAG_RED; +} + +static inline struct rb_entry *rb_head(struct rb_entry *iter) +{ + if (!iter) return NULL; + while (iter->left) iter = iter->left; + return iter; +} + +static inline struct rb_entry *rb_next(struct rb_entry *iter) +{ + if (iter->right) return rb_head(iter->right); + while (iter->parent && iter->parent->right == iter) iter = iter->parent; + return iter->parent; +} + +static inline struct rb_entry *rb_postorder_head(struct rb_entry *iter) +{ + if (!iter) return NULL; + + for (;;) { + while (iter->left) iter = iter->left; + if (!iter->right) return iter; + iter = iter->right; + } +} + +static inline struct rb_entry *rb_postorder_next(struct rb_entry *iter) +{ + if (!iter->parent) return NULL; + if (iter == iter->parent->right || !iter->parent->right) return iter->parent; + return rb_postorder_head(iter->parent->right); +} + +/* iterate through the tree */ +#define RB_FOR_EACH(cursor, tree) \ + for ((cursor) = rb_head((tree)->root); (cursor); (cursor) = rb_next(cursor)) + +/* iterate through the tree using a tree entry */ +#define RB_FOR_EACH_ENTRY(elem, tree, type, field) \ + for ((elem) = RB_ENTRY_VALUE(rb_head((tree)->root), type, field); \ + (elem) != RB_ENTRY_VALUE(0, type, field); \ + (elem) = RB_ENTRY_VALUE(rb_next(&elem->field), type, field)) + +/* iterate through the tree using using postorder, making it safe to free the entry */ +#define RB_FOR_EACH_DESTRUCTOR(cursor, cursor2, tree) \ + for ((cursor) = rb_postorder_head((tree)->root); \ + (cursor) && (((cursor2) = rb_postorder_next(cursor)) || 1); \ + (cursor) = (cursor2)) + +/* iterate through the tree using a tree entry and postorder, making it safe to free the entry */ +#define RB_FOR_EACH_ENTRY_DESTRUCTOR(elem, elem2, tree, type, field) \ + for ((elem) = RB_ENTRY_VALUE(rb_postorder_head((tree)->root), type, field); \ + (elem) != WINE_RB_ENTRY_VALUE(0, type, field) \ + && (((elem2) = RB_ENTRY_VALUE(rb_postorder_next(&(elem)->field), type, field)) || 1); \ + (elem) = (elem2)) + + +static inline void rb_postorder(struct rb_tree *tree, rb_traverse_func *callback, void *context) +{ + struct rb_entry *iter, *next; + RB_FOR_EACH_DESTRUCTOR(iter, next, tree) callback(iter, context); +} + +static inline void rb_init(struct rb_tree *tree, rb_compare_func compare) +{ + tree->compare = compare; + tree->root = NULL; +} + +static inline void rb_for_each_entry(struct rb_tree *tree, rb_traverse_func *callback, void *context) +{ + struct rb_entry *iter; + RB_FOR_EACH(iter, tree) callback(iter, context); +} + +static inline void rb_clear(struct rb_tree *tree, rb_traverse_func *callback, void *context) +{ + /* Note that we use postorder here because the callback will likely free the entry. */ + if (callback) rb_postorder(tree, callback, context); + tree->root = NULL; +} + +static inline void rb_destroy(struct rb_tree *tree, rb_traverse_func *callback, void *context) +{ + rb_clear(tree, callback, context); +} + +static inline struct rb_entry *rb_get(const struct rb_tree *tree, const void *key) +{ + struct rb_entry *entry = tree->root; + while (entry) + { + int c = tree->compare(key, entry); + if (!c) return entry; + entry = c < 0 ? entry->left : entry->right; + } + return NULL; +} + +static inline int rb_put(struct rb_tree *tree, const void *key, struct rb_entry *entry) +{ + struct rb_entry **iter = &tree->root, *parent = tree->root; + + while (*iter) + { + int c; + + parent = *iter; + c = tree->compare(key, parent); + if (!c) return -1; + else if (c < 0) iter = &parent->left; + else iter = &parent->right; + } + + entry->flags = RB_FLAG_RED; + entry->parent = parent; + entry->left = NULL; + entry->right = NULL; + *iter = entry; + + while (rb_is_red(entry->parent)) + { + if (entry->parent == entry->parent->parent->left) + { + if (rb_is_red(entry->parent->parent->right)) + { + rb_flip_color(entry->parent->parent); + entry = entry->parent->parent; + } + else + { + if (entry == entry->parent->right) + { + entry = entry->parent; + rb_rotate_left(tree, entry); + } + entry->parent->flags &= ~RB_FLAG_RED; + entry->parent->parent->flags |= RB_FLAG_RED; + rb_rotate_right(tree, entry->parent->parent); + } + } + else + { + if (rb_is_red(entry->parent->parent->left)) + { + rb_flip_color(entry->parent->parent); + entry = entry->parent->parent; + } + else + { + if (entry == entry->parent->left) + { + entry = entry->parent; + rb_rotate_right(tree, entry); + } + entry->parent->flags &= ~RB_FLAG_RED; + entry->parent->parent->flags |= RB_FLAG_RED; + rb_rotate_left(tree, entry->parent->parent); + } + } + } + + tree->root->flags &= ~RB_FLAG_RED; + + return 0; +} + +static inline void rb_remove(struct rb_tree *tree, struct rb_entry *entry) +{ + struct rb_entry *iter, *child, *parent, *w; + int need_fixup; + + if (entry->right && entry->left) + for(iter = entry->right; iter->left; iter = iter->left); + else + iter = entry; + + child = iter->left ? iter->left : iter->right; + + if (!iter->parent) + tree->root = child; + else if (iter == iter->parent->left) + iter->parent->left = child; + else + iter->parent->right = child; + + if (child) child->parent = iter->parent; + parent = iter->parent; + + need_fixup = !rb_is_red(iter); + + if (entry != iter) + { + *iter = *entry; + if (!iter->parent) + tree->root = iter; + else if (entry == iter->parent->left) + iter->parent->left = iter; + else + iter->parent->right = iter; + + if (iter->right) iter->right->parent = iter; + if (iter->left) iter->left->parent = iter; + if (parent == entry) parent = iter; + } + + if (need_fixup) + { + while (parent && !rb_is_red(child)) + { + if (child == parent->left) + { + w = parent->right; + if (rb_is_red(w)) + { + w->flags &= ~RB_FLAG_RED; + parent->flags |= RB_FLAG_RED; + rb_rotate_left(tree, parent); + w = parent->right; + } + if (rb_is_red(w->left) || rb_is_red(w->right)) + { + if (!rb_is_red(w->right)) + { + w->left->flags &= ~RB_FLAG_RED; + w->flags |= RB_FLAG_RED; + rb_rotate_right(tree, w); + w = parent->right; + } + w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); + parent->flags &= ~RB_FLAG_RED; + if (w->right) + w->right->flags &= ~RB_FLAG_RED; + rb_rotate_left(tree, parent); + child = NULL; + break; + } + } + else + { + w = parent->left; + if (rb_is_red(w)) + { + w->flags &= ~RB_FLAG_RED; + parent->flags |= RB_FLAG_RED; + rb_rotate_right(tree, parent); + w = parent->left; + } + if (rb_is_red(w->left) || rb_is_red(w->right)) + { + if (!rb_is_red(w->left)) + { + w->right->flags &= ~RB_FLAG_RED; + w->flags |= RB_FLAG_RED; + rb_rotate_left(tree, w); + w = parent->left; + } + w->flags = (w->flags & ~RB_FLAG_RED) | (parent->flags & RB_FLAG_RED); + parent->flags &= ~RB_FLAG_RED; + if (w->left) + w->left->flags &= ~RB_FLAG_RED; + rb_rotate_right(tree, parent); + child = NULL; + break; + } + } + w->flags |= RB_FLAG_RED; + child = parent; + parent = child->parent; + } + if (child) child->flags &= ~RB_FLAG_RED; + } + + if (tree->root) tree->root->flags &= ~RB_FLAG_RED; +} + +static inline void rb_remove_key(struct rb_tree *tree, const void *key) +{ + struct rb_entry *entry = rb_get(tree, key); + if (entry) rb_remove(tree, entry); +} + +#endif /* __WINE_WINE_RBTREE_H */ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 3cf0422596c..1ac23b4a085 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -84,7 +84,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; @@ -249,6 +249,7 @@ static inline LONG InterlockedDecrement(LONG volatile *x) # else # error "InterlockedDecrement() not implemented for this platform" # endif + #endif /* _WIN32 */ static inline void vkd3d_parse_version(const char *version, int *major, int *minor) diff --git a/libs/vkd3d/include/private/vkd3d_debug.h b/libs/vkd3d/include/private/vkd3d_debug.h index 4f6d43af12f..6708cad344f 100644 --- a/libs/vkd3d/include/private/vkd3d_debug.h +++ b/libs/vkd3d/include/private/vkd3d_debug.h @@ -91,7 +91,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) -#define VKD3D_DEBUG_ENV_NAME(name) const char *vkd3d_dbg_env_name = name +#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name static inline const char *debugstr_guid(const GUID *guid) { diff --git a/libs/vkd3d/include/private/vkd3d_test.h b/libs/vkd3d/include/private/vkd3d_test.h new file mode 100644 index 00000000000..081443c4fa6 --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_test.h @@ -0,0 +1,432 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_TEST_H +#define __VKD3D_TEST_H + +#include "vkd3d_common.h" +#include +#include +#include +#include +#include +#include +#include + +extern const char *vkd3d_test_name; +extern const char *vkd3d_test_platform; + +static void vkd3d_test_start_todo(bool is_todo); +static int vkd3d_test_loop_todo(void); +static void vkd3d_test_end_todo(void); + +#define START_TEST(name) \ + const char *vkd3d_test_name = #name; \ + static void vkd3d_test_main(int argc, char **argv) + +/* + * Use assert_that() for conditions that should always be true. + * todo_if() and bug_if() do not influence assert_that(). + */ +#define assert_that assert_that_(__LINE__) + +#define ok ok_(__LINE__) + +#define skip skip_(__LINE__) + +#define trace trace_(__LINE__) + +#define assert_that_(line) \ + do { \ + unsigned int vkd3d_line = line; \ + VKD3D_TEST_ASSERT_THAT + +#define VKD3D_TEST_ASSERT_THAT(...) \ + vkd3d_test_assert_that(vkd3d_line, __VA_ARGS__); } while (0) + +#define ok_(line) \ + do { \ + unsigned int vkd3d_line = line; \ + VKD3D_TEST_OK + +#define VKD3D_TEST_OK(...) \ + vkd3d_test_ok(vkd3d_line, __VA_ARGS__); } while (0) + +#define todo_(line) \ + do { \ + unsigned int vkd3d_line = line; \ + VKD3D_TEST_TODO + +#define VKD3D_TEST_TODO(...) \ + vkd3d_test_todo(vkd3d_line, __VA_ARGS__); } while (0) + +#define skip_(line) \ + do { \ + unsigned int vkd3d_line = line; \ + VKD3D_TEST_SKIP + +#define VKD3D_TEST_SKIP(...) \ + vkd3d_test_skip(vkd3d_line, __VA_ARGS__); } while (0) + +#define trace_(line) \ + do { \ + unsigned int vkd3d_line = line; \ + VKD3D_TEST_TRACE + +#define VKD3D_TEST_TRACE(...) \ + vkd3d_test_trace(vkd3d_line, __VA_ARGS__); } while (0) + +#define todo_if(is_todo) \ + for (vkd3d_test_start_todo(is_todo); vkd3d_test_loop_todo(); vkd3d_test_end_todo()) + +#define bug_if(is_bug) \ + for (vkd3d_test_start_bug(is_bug); vkd3d_test_loop_bug(); vkd3d_test_end_bug()) + +#define todo todo_if(true) + +struct vkd3d_test_state +{ + LONG success_count; + LONG failure_count; + LONG skip_count; + LONG todo_count; + LONG todo_success_count; + LONG bug_count; + + unsigned int debug_level; + + unsigned int todo_level; + bool todo_do_loop; + + unsigned int bug_level; + bool bug_do_loop; + bool bug_enabled; + + const char *test_name_filter; + char context[8][128]; + unsigned int context_count; +}; +extern struct vkd3d_test_state vkd3d_test_state; + +static bool +vkd3d_test_platform_is_windows(void) +{ + return !strcmp(vkd3d_test_platform, "windows"); +} + +static inline bool +broken(bool condition) +{ + return condition && vkd3d_test_platform_is_windows(); +} + +static void vkd3d_test_printf(unsigned int line, const char *msg) +{ + unsigned int i; + + printf("%s:%u: ", vkd3d_test_name, line); + for (i = 0; i < vkd3d_test_state.context_count; ++i) + printf("%s: ", vkd3d_test_state.context[i]); + printf("%s", msg); +} + +static void +vkd3d_test_check_assert_that(unsigned int line, bool result, const char *fmt, va_list args) +{ + if (result) + { + InterlockedIncrement(&vkd3d_test_state.success_count); + if (vkd3d_test_state.debug_level > 1) + vkd3d_test_printf(line, "Test succeeded.\n"); + } + else + { + InterlockedIncrement(&vkd3d_test_state.failure_count); + vkd3d_test_printf(line, "Test failed: "); + vprintf(fmt, args); + } +} + +static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED +vkd3d_test_assert_that(unsigned int line, bool result, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_test_check_assert_that(line, result, fmt, args); + va_end(args); +} + +static void +vkd3d_test_check_ok(unsigned int line, bool result, const char *fmt, va_list args) +{ + bool is_todo = vkd3d_test_state.todo_level && !vkd3d_test_platform_is_windows(); + bool is_bug = vkd3d_test_state.bug_level && !vkd3d_test_platform_is_windows(); + + if (is_bug && vkd3d_test_state.bug_enabled) + { + InterlockedIncrement(&vkd3d_test_state.bug_count); + if (is_todo) + result = !result; + if (result) + vkd3d_test_printf(line, "Fixed bug: "); + else + vkd3d_test_printf(line, "Bug: "); + vprintf(fmt, args); + } + else if (is_todo) + { + if (result) + { + InterlockedIncrement(&vkd3d_test_state.todo_success_count); + vkd3d_test_printf(line, "Todo succeeded: "); + } + else + { + InterlockedIncrement(&vkd3d_test_state.todo_count); + vkd3d_test_printf(line, "Todo: "); + } + vprintf(fmt, args); + } + else + { + vkd3d_test_check_assert_that(line, result, fmt, args); + } +} + +static void VKD3D_PRINTF_FUNC(3, 4) VKD3D_UNUSED +vkd3d_test_ok(unsigned int line, bool result, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vkd3d_test_check_ok(line, result, fmt, args); + va_end(args); +} + +static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED +vkd3d_test_skip(unsigned int line, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vkd3d_test_printf(line, "Test skipped: "); + vprintf(fmt, args); + va_end(args); + InterlockedIncrement(&vkd3d_test_state.skip_count); +} + +static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED +vkd3d_test_trace(unsigned int line, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vkd3d_test_printf(line, ""); + vprintf(fmt, args); + va_end(args); +} + +static void VKD3D_PRINTF_FUNC(1, 2) VKD3D_UNUSED +vkd3d_test_debug(const char *fmt, ...) +{ + char buffer[512]; + va_list args; + int size; + + size = snprintf(buffer, sizeof(buffer), "%s: ", vkd3d_test_name); + if (0 < size && size < sizeof(buffer)) + { + va_start(args, fmt); + vsnprintf(buffer + size, sizeof(buffer) - size, fmt, args); + va_end(args); + } + buffer[sizeof(buffer) - 1] = '\0'; + +#ifdef _WIN32 + OutputDebugStringA(buffer); +#endif + + if (vkd3d_test_state.debug_level > 0) + printf("%s\n", buffer); +} + +#ifndef VKD3D_TEST_NO_DEFS +const char *vkd3d_test_platform = "other"; +struct vkd3d_test_state vkd3d_test_state; + +static void vkd3d_test_main(int argc, char **argv); + +int main(int argc, char **argv) +{ + const char *test_filter = getenv("VKD3D_TEST_FILTER"); + const char *debug_level = getenv("VKD3D_TEST_DEBUG"); + char *test_platform = getenv("VKD3D_TEST_PLATFORM"); + const char *bug = getenv("VKD3D_TEST_BUG"); + + memset(&vkd3d_test_state, 0, sizeof(vkd3d_test_state)); + vkd3d_test_state.debug_level = debug_level ? atoi(debug_level) : 0; + vkd3d_test_state.bug_enabled = bug ? atoi(bug) : true; + vkd3d_test_state.test_name_filter = test_filter; + + if (test_platform) + { + test_platform = strdup(test_platform); + vkd3d_test_platform = test_platform; + } + + if (vkd3d_test_state.debug_level > 1) + printf("Test platform: '%s'.\n", vkd3d_test_platform); + + vkd3d_test_main(argc, argv); + + printf("%s: %lu tests executed (%lu failures, %lu skipped, %lu todo, %lu bugs).\n", + vkd3d_test_name, + (unsigned long)(vkd3d_test_state.success_count + + vkd3d_test_state.failure_count + vkd3d_test_state.todo_count + + vkd3d_test_state.todo_success_count), + (unsigned long)(vkd3d_test_state.failure_count + + vkd3d_test_state.todo_success_count), + (unsigned long)vkd3d_test_state.skip_count, + (unsigned long)vkd3d_test_state.todo_count, + (unsigned long)vkd3d_test_state.bug_count); + + if (test_platform) + free(test_platform); + + return vkd3d_test_state.failure_count || vkd3d_test_state.todo_success_count; +} + +#ifdef _WIN32 +static char *vkd3d_test_strdupWtoA(WCHAR *str) +{ + char *out; + int len; + + if (!(len = WideCharToMultiByte(CP_ACP, 0, str, -1, NULL, 0, NULL, NULL))) + return NULL; + if (!(out = malloc(len))) + return NULL; + WideCharToMultiByte(CP_ACP, 0, str, -1, out, len, NULL, NULL); + + return out; +} + +static bool running_under_wine(void) +{ + HMODULE module = GetModuleHandleA("ntdll.dll"); + return module && GetProcAddress(module, "wine_server_call"); +} + +int wmain(int argc, WCHAR **wargv) +{ + char **argv; + int i, ret; + + argv = malloc(argc * sizeof(*argv)); + assert(argv); + for (i = 0; i < argc; ++i) + { + if (!(argv[i] = vkd3d_test_strdupWtoA(wargv[i]))) + break; + } + assert(i == argc); + + vkd3d_test_platform = running_under_wine() ? "wine" : "windows"; + + ret = main(argc, argv); + + for (i = 0; i < argc; ++i) + free(argv[i]); + free(argv); + + return ret; +} +#endif /* _WIN32 */ +#endif /* VKD3D_TEST_NO_DEFS */ + +typedef void (*vkd3d_test_pfn)(void); + +static inline void vkd3d_run_test(const char *name, vkd3d_test_pfn test_pfn) +{ + if (vkd3d_test_state.test_name_filter && !strstr(name, vkd3d_test_state.test_name_filter)) + return; + + vkd3d_test_debug("%s", name); + test_pfn(); +} + +static inline void vkd3d_test_start_todo(bool is_todo) +{ + vkd3d_test_state.todo_level = (vkd3d_test_state.todo_level << 1) | is_todo; + vkd3d_test_state.todo_do_loop = true; +} + +static inline int vkd3d_test_loop_todo(void) +{ + bool do_loop = vkd3d_test_state.todo_do_loop; + vkd3d_test_state.todo_do_loop = false; + return do_loop; +} + +static inline void vkd3d_test_end_todo(void) +{ + vkd3d_test_state.todo_level >>= 1; +} + +static inline void vkd3d_test_start_bug(bool is_bug) +{ + vkd3d_test_state.bug_level = (vkd3d_test_state.bug_level << 1) | is_bug; + vkd3d_test_state.bug_do_loop = true; +} + +static inline int vkd3d_test_loop_bug(void) +{ + bool do_loop = vkd3d_test_state.bug_do_loop; + vkd3d_test_state.bug_do_loop = false; + return do_loop; +} + +static inline void vkd3d_test_end_bug(void) +{ + vkd3d_test_state.bug_level >>= 1; +} + +static inline void vkd3d_test_push_context(const char *fmt, ...) +{ + va_list args; + + if (vkd3d_test_state.context_count < ARRAY_SIZE(vkd3d_test_state.context)) + { + va_start(args, fmt); + vsnprintf(vkd3d_test_state.context[vkd3d_test_state.context_count], + sizeof(vkd3d_test_state.context), fmt, args); + va_end(args); + vkd3d_test_state.context[vkd3d_test_state.context_count][sizeof(vkd3d_test_state.context[0]) - 1] = '\0'; + } + ++vkd3d_test_state.context_count; +} + +static inline void vkd3d_test_pop_context(void) +{ + if (vkd3d_test_state.context_count) + --vkd3d_test_state.context_count; +} + +#define run_test(test_pfn) \ + vkd3d_run_test(#test_pfn, test_pfn) + +#endif /* __VKD3D_TEST_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index ff2b15c51dc..72ed3ced671 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -76,6 +76,7 @@ enum vkd3d_api_version VKD3D_API_VERSION_1_5, VKD3D_API_VERSION_1_6, VKD3D_API_VERSION_1_7, + VKD3D_API_VERSION_1_8, VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), }; diff --git a/libs/vkd3d/include/vkd3d_d3d9types.h b/libs/vkd3d/include/vkd3d_d3d9types.h new file mode 100644 index 00000000000..75d0461409d --- /dev/null +++ b/libs/vkd3d/include/vkd3d_d3d9types.h @@ -0,0 +1,237 @@ +/* + * Copyright 2002-2003 Jason Edmeades + * Copyright 2002-2003 Raphael Junqueira + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_D3D9TYPES_H +#define __VKD3D_D3D9TYPES_H +#ifndef _d3d9TYPES_H_ + +#ifndef MAKEFOURCC +#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + ((DWORD)(BYTE)(ch0) | ((DWORD)(BYTE)(ch1) << 8) | \ + ((DWORD)(BYTE)(ch2) << 16) | ((DWORD)(BYTE)(ch3) << 24 )) +#endif + +#define D3DSI_INSTLENGTH_SHIFT 24 + +#define D3DSP_DCL_USAGE_SHIFT 0 +#define D3DSP_DCL_USAGEINDEX_SHIFT 16 +#define D3DSP_DSTMOD_SHIFT 20 + +#define D3DSP_SRCMOD_SHIFT 24 + +#define D3DSP_REGTYPE_SHIFT 28 +#define D3DSP_REGTYPE_SHIFT2 8 +#define D3DSP_REGTYPE_MASK (0x7 << D3DSP_REGTYPE_SHIFT) +#define D3DSP_REGTYPE_MASK2 0x00001800 + +#define D3DSP_WRITEMASK_0 0x00010000 +#define D3DSP_WRITEMASK_1 0x00020000 +#define D3DSP_WRITEMASK_2 0x00040000 +#define D3DSP_WRITEMASK_3 0x00080000 +#define D3DSP_WRITEMASK_ALL 0x000f0000 + +#define D3DPS_VERSION(major, minor) (0xffff0000 | ((major) << 8) | (minor)) +#define D3DVS_VERSION(major, minor) (0xfffe0000 | ((major) << 8) | (minor)) + +typedef enum _D3DDECLUSAGE +{ + D3DDECLUSAGE_POSITION = 0x0, + D3DDECLUSAGE_BLENDWEIGHT = 0x1, + D3DDECLUSAGE_BLENDINDICES = 0x2, + D3DDECLUSAGE_NORMAL = 0x3, + D3DDECLUSAGE_PSIZE = 0x4, + D3DDECLUSAGE_TEXCOORD = 0x5, + D3DDECLUSAGE_TANGENT = 0x6, + D3DDECLUSAGE_BINORMAL = 0x7, + D3DDECLUSAGE_TESSFACTOR = 0x8, + D3DDECLUSAGE_POSITIONT = 0x9, + D3DDECLUSAGE_COLOR = 0xa, + D3DDECLUSAGE_FOG = 0xb, + D3DDECLUSAGE_DEPTH = 0xc, + D3DDECLUSAGE_SAMPLE = 0xd, +} D3DDECLUSAGE; + +typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE +{ + D3DSIO_NOP = 0x00, + D3DSIO_MOV = 0x01, + D3DSIO_ADD = 0x02, + D3DSIO_SUB = 0x03, + D3DSIO_MAD = 0x04, + D3DSIO_MUL = 0x05, + D3DSIO_RCP = 0x06, + D3DSIO_RSQ = 0x07, + D3DSIO_DP3 = 0x08, + D3DSIO_DP4 = 0x09, + D3DSIO_MIN = 0x0a, + D3DSIO_MAX = 0x0b, + D3DSIO_SLT = 0x0c, + D3DSIO_SGE = 0x0d, + D3DSIO_EXP = 0x0e, + D3DSIO_LOG = 0x0f, + D3DSIO_LIT = 0x10, + D3DSIO_DST = 0x11, + D3DSIO_LRP = 0x12, + D3DSIO_FRC = 0x13, + D3DSIO_M4x4 = 0x14, + D3DSIO_M4x3 = 0x15, + D3DSIO_M3x4 = 0x16, + D3DSIO_M3x3 = 0x17, + D3DSIO_M3x2 = 0x18, + D3DSIO_CALL = 0x19, + D3DSIO_CALLNZ = 0x1a, + D3DSIO_LOOP = 0x1b, + D3DSIO_RET = 0x1c, + D3DSIO_ENDLOOP = 0x1d, + D3DSIO_LABEL = 0x1e, + D3DSIO_DCL = 0x1f, + D3DSIO_POW = 0x20, + D3DSIO_CRS = 0x21, + D3DSIO_SGN = 0x22, + D3DSIO_ABS = 0x23, + D3DSIO_NRM = 0x24, + D3DSIO_SINCOS = 0x25, + D3DSIO_REP = 0x26, + D3DSIO_ENDREP = 0x27, + D3DSIO_IF = 0x28, + D3DSIO_IFC = 0x29, + D3DSIO_ELSE = 0x2a, + D3DSIO_ENDIF = 0x2b, + D3DSIO_BREAK = 0x2c, + D3DSIO_BREAKC = 0x2d, + D3DSIO_MOVA = 0x2e, + D3DSIO_DEFB = 0x2f, + D3DSIO_DEFI = 0x30, + + D3DSIO_TEXCOORD = 0x40, + D3DSIO_TEXKILL = 0x41, + D3DSIO_TEX = 0x42, + D3DSIO_TEXBEM = 0x43, + D3DSIO_TEXBEML = 0x44, + D3DSIO_TEXREG2AR = 0x45, + D3DSIO_TEXREG2GB = 0x46, + D3DSIO_TEXM3x2PAD = 0x47, + D3DSIO_TEXM3x2TEX = 0x48, + D3DSIO_TEXM3x3PAD = 0x49, + D3DSIO_TEXM3x3TEX = 0x4a, + D3DSIO_TEXM3x3DIFF = 0x4b, + D3DSIO_TEXM3x3SPEC = 0x4c, + D3DSIO_TEXM3x3VSPEC = 0x4d, + D3DSIO_EXPP = 0x4e, + D3DSIO_LOGP = 0x4f, + D3DSIO_CND = 0x50, + D3DSIO_DEF = 0x51, + D3DSIO_TEXREG2RGB = 0x52, + D3DSIO_TEXDP3TEX = 0x53, + D3DSIO_TEXM3x2DEPTH = 0x54, + D3DSIO_TEXDP3 = 0x55, + D3DSIO_TEXM3x3 = 0x56, + D3DSIO_TEXDEPTH = 0x57, + D3DSIO_CMP = 0x58, + D3DSIO_BEM = 0x59, + D3DSIO_DP2ADD = 0x5a, + D3DSIO_DSX = 0x5b, + D3DSIO_DSY = 0x5c, + D3DSIO_TEXLDD = 0x5d, + D3DSIO_SETP = 0x5e, + D3DSIO_TEXLDL = 0x5f, + D3DSIO_BREAKP = 0x60, + + D3DSIO_PHASE = 0xfffd, + D3DSIO_COMMENT = 0xfffe, + D3DSIO_END = 0xffff, + + D3DSIO_FORCE_DWORD = 0x7fffffff, +} D3DSHADER_INSTRUCTION_OPCODE_TYPE; + +typedef enum _D3DSHADER_PARAM_DSTMOD_TYPE +{ + D3DSPDM_NONE = 0 << D3DSP_DSTMOD_SHIFT, + D3DSPDM_SATURATE = 1 << D3DSP_DSTMOD_SHIFT, + D3DSPDM_PARTIALPRECISION = 2 << D3DSP_DSTMOD_SHIFT, + D3DSPDM_MSAMPCENTROID = 4 << D3DSP_DSTMOD_SHIFT, + + D3DSPDM_FORCE_DWORD = 0x7fffffff, +} D3DSHADER_PARAM_DSTMOD_TYPE; + +typedef enum _D3DSHADER_PARAM_REGISTER_TYPE +{ + D3DSPR_TEMP = 0x00, + D3DSPR_INPUT = 0x01, + D3DSPR_CONST = 0x02, + D3DSPR_ADDR = 0x03, + D3DSPR_TEXTURE = 0x03, + D3DSPR_RASTOUT = 0x04, + D3DSPR_ATTROUT = 0x05, + D3DSPR_TEXCRDOUT = 0x06, + D3DSPR_OUTPUT = 0x06, + D3DSPR_CONSTINT = 0x07, + D3DSPR_COLOROUT = 0x08, + D3DSPR_DEPTHOUT = 0x09, + D3DSPR_SAMPLER = 0x0a, + D3DSPR_CONST2 = 0x0b, + D3DSPR_CONST3 = 0x0c, + D3DSPR_CONST4 = 0x0d, + D3DSPR_CONSTBOOL = 0x0e, + D3DSPR_LOOP = 0x0f, + D3DSPR_TEMPFLOAT16 = 0x10, + D3DSPR_MISCTYPE = 0x11, + D3DSPR_LABEL = 0x12, + D3DSPR_PREDICATE = 0x13, + + D3DSPR_FORCE_DWORD = 0x7fffffff, +} D3DSHADER_PARAM_REGISTER_TYPE; + +typedef enum _D3DSHADER_PARAM_SRCMOD_TYPE +{ + D3DSPSM_NONE = 0x0 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_NEG = 0x1 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_BIAS = 0x2 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_BIASNEG = 0x3 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_SIGN = 0x4 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_SIGNNEG = 0x5 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_COMP = 0x6 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_X2 = 0x7 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_X2NEG = 0x8 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_DZ = 0x9 << D3DSP_SRCMOD_SHIFT, + D3DSPSM_DW = 0xa << D3DSP_SRCMOD_SHIFT, + D3DSPSM_ABS = 0xb << D3DSP_SRCMOD_SHIFT, + D3DSPSM_ABSNEG = 0xc << D3DSP_SRCMOD_SHIFT, + D3DSPSM_NOT = 0xd << D3DSP_SRCMOD_SHIFT, + + D3DSPSM_FORCE_DWORD = 0x7fffffff, +} D3DSHADER_PARAM_SRCMOD_TYPE; + +typedef enum _D3DSHADER_MISCTYPE_OFFSETS +{ + D3DSMO_POSITION = 0x0, + D3DSMO_FACE = 0x1, +} D3DSHADER_MISCTYPE_OFFSETS; + +typedef enum _D3DVS_RASTOUT_OFFSETS +{ + D3DSRO_POSITION = 0x0, + D3DSRO_FOG = 0x1, + D3DSRO_POINT_SIZE = 0x2, + + D3DSRO_FORCE_DWORD = 0x7fffffff, +} D3DVS_RASTOUT_OFFSETS; + +#endif /* _d3d9TYPES_H_ */ +#endif /* __VKD3D_D3D9TYPES_H */ diff --git a/libs/vkd3d/include/vkd3d_d3dcompiler.h b/libs/vkd3d/include/vkd3d_d3dcompiler.h new file mode 100644 index 00000000000..c934835dc0a --- /dev/null +++ b/libs/vkd3d/include/vkd3d_d3dcompiler.h @@ -0,0 +1,74 @@ +/* + * Copyright 2010 Matteo Bruni for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_D3DCOMPILER_H +#define __VKD3D_D3DCOMPILER_H +#ifndef __D3DCOMPILER_H__ + +#define D3DCOMPILE_DEBUG 0x00000001 +#define D3DCOMPILE_SKIP_VALIDATION 0x00000002 +#define D3DCOMPILE_SKIP_OPTIMIZATION 0x00000004 +#define D3DCOMPILE_PACK_MATRIX_ROW_MAJOR 0x00000008 +#define D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR 0x00000010 +#define D3DCOMPILE_PARTIAL_PRECISION 0x00000020 +#define D3DCOMPILE_FORCE_VS_SOFTWARE_NO_OPT 0x00000040 +#define D3DCOMPILE_FORCE_PS_SOFTWARE_NO_OPT 0x00000080 +#define D3DCOMPILE_NO_PRESHADER 0x00000100 +#define D3DCOMPILE_AVOID_FLOW_CONTROL 0x00000200 +#define D3DCOMPILE_PREFER_FLOW_CONTROL 0x00000400 +#define D3DCOMPILE_ENABLE_STRICTNESS 0x00000800 +#define D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY 0x00001000 +#define D3DCOMPILE_IEEE_STRICTNESS 0x00002000 +#define D3DCOMPILE_OPTIMIZATION_LEVEL0 0x00004000 +#define D3DCOMPILE_OPTIMIZATION_LEVEL1 0x00000000 +#define D3DCOMPILE_OPTIMIZATION_LEVEL2 0x0000c000 +#define D3DCOMPILE_OPTIMIZATION_LEVEL3 0x00008000 +#define D3DCOMPILE_RESERVED16 0x00010000 +#define D3DCOMPILE_RESERVED17 0x00020000 +#define D3DCOMPILE_WARNINGS_ARE_ERRORS 0x00040000 +#define D3DCOMPILE_RESOURCES_MAY_ALIAS 0x00080000 +#define D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES 0x00100000 +#define D3DCOMPILE_ALL_RESOURCES_BOUND 0x00200000 +#define D3DCOMPILE_DEBUG_NAME_FOR_SOURCE 0x00400000 +#define D3DCOMPILE_DEBUG_NAME_FOR_BINARY 0x00800000 + +#define D3DCOMPILE_EFFECT_CHILD_EFFECT 0x00000001 +#define D3DCOMPILE_EFFECT_ALLOW_SLOW_OPS 0x00000002 + +#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_LATEST 0x00000000 +#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_0 0x00000010 +#define D3DCOMPILE_FLAGS2_FORCE_ROOT_SIGNATURE_1_1 0x00000020 + +#define D3DCOMPILE_SECDATA_MERGE_UAV_SLOTS 0x00000001 +#define D3DCOMPILE_SECDATA_PRESERVE_TEMPLATE_SLOTS 0x00000002 +#define D3DCOMPILE_SECDATA_REQUIRE_TEMPLATE_MATCH 0x00000004 + +HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, + const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, + const char *profile, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); +HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, + const D3D_SHADER_MACRO *macros, ID3DInclude *include, const char *entrypoint, + const char *profile, UINT flags, UINT effect_flags, UINT secondary_flags, + const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, + ID3DBlob **error_messages); +HRESULT WINAPI D3DCreateBlob(SIZE_T size, ID3DBlob **blob); +HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, const D3D_SHADER_MACRO *macros, + ID3DInclude *include, ID3DBlob **shader, ID3DBlob **error_messages); + +#endif /* __D3DCOMPILER_H__ */ +#endif /* __VKD3D_D3DCOMPILER_H */ diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 859b8c79792..274241546ea 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -49,6 +49,7 @@ enum vkd3d_shader_api_version VKD3D_SHADER_API_VERSION_1_5, VKD3D_SHADER_API_VERSION_1_6, VKD3D_SHADER_API_VERSION_1_7, + VKD3D_SHADER_API_VERSION_1_8, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), }; diff --git a/libs/vkd3d/include/vkd3d_utils.h b/libs/vkd3d/include/vkd3d_utils.h new file mode 100644 index 00000000000..e8462563576 --- /dev/null +++ b/libs/vkd3d/include/vkd3d_utils.h @@ -0,0 +1,108 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_UTILS_H +#define __VKD3D_UTILS_H + +#include + +#ifndef VKD3D_UTILS_API_VERSION +#define VKD3D_UTILS_API_VERSION VKD3D_API_VERSION_1_0 +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * \file vkd3d_utils.h + * + * This file contains definitions for the vkd3d-utils library. + * + * The vkd3d-utils library is a collections of routines to ease the + * porting of a Direct3D 12 application to vkd3d. + * + * \since 1.0 + */ + +#define VKD3D_WAIT_OBJECT_0 (0) +#define VKD3D_WAIT_TIMEOUT (1) +#define VKD3D_WAIT_FAILED (~0u) +#define VKD3D_INFINITE (~0u) + +#ifdef LIBVKD3D_UTILS_SOURCE +# define VKD3D_UTILS_API VKD3D_EXPORT +#else +# define VKD3D_UTILS_API VKD3D_IMPORT +#endif + +/* 1.0 */ +VKD3D_UTILS_API HANDLE vkd3d_create_event(void); +VKD3D_UTILS_API HRESULT vkd3d_signal_event(HANDLE event); +VKD3D_UTILS_API unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds); +VKD3D_UTILS_API void vkd3d_destroy_event(HANDLE event); + +#define D3D12CreateDevice(a, b, c, d) D3D12CreateDeviceVKD3D(a, b, c, d, VKD3D_UTILS_API_VERSION) +VKD3D_UTILS_API HRESULT WINAPI D3D12CreateRootSignatureDeserializer( + const void *data, SIZE_T data_size, REFIID iid, void **deserializer); +VKD3D_UTILS_API HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug); +VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc, + D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob); + +/* 1.2 */ +VKD3D_UTILS_API HRESULT WINAPI D3D12CreateDeviceVKD3D(IUnknown *adapter, D3D_FEATURE_LEVEL feature_level, + REFIID iid, void **device, enum vkd3d_api_version api_version); +VKD3D_UTILS_API HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data, + SIZE_T data_size, REFIID iid, void **deserializer); +VKD3D_UTILS_API HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc, + ID3DBlob **blob, ID3DBlob **error_blob); + +/* 1.3 */ +VKD3D_UTILS_API HRESULT WINAPI D3DCompile(const void *data, SIZE_T data_size, const char *filename, + const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, + const char *target, UINT flags, UINT effect_flags, ID3DBlob **shader, ID3DBlob **error_messages); +VKD3D_UTILS_API HRESULT WINAPI D3DCompile2(const void *data, SIZE_T data_size, const char *filename, + const D3D_SHADER_MACRO *defines, ID3DInclude *include, const char *entrypoint, + const char *target, UINT flags, UINT effect_flags, UINT secondary_flags, + const void *secondary_data, SIZE_T secondary_data_size, ID3DBlob **shader, + ID3DBlob **error_messages); +VKD3D_UTILS_API HRESULT WINAPI D3DCreateBlob(SIZE_T data_size, ID3DBlob **blob); +VKD3D_UTILS_API HRESULT WINAPI D3DPreprocess(const void *data, SIZE_T size, const char *filename, + const D3D_SHADER_MACRO *defines, ID3DInclude *include, + ID3DBlob **shader, ID3DBlob **error_messages); + +/** + * Set a callback to be called when vkd3d-utils outputs debug logging. + * + * If NULL, or if this function has not been called, libvkd3d-utils will print + * all enabled log output to stderr. + * + * Calling this function will also set the log callback for libvkd3d and + * libvkd3d-shader. + * + * \param callback Callback function to set. + * + * \since 1.4 + */ +VKD3D_UTILS_API void vkd3d_utils_set_log_callback(PFN_vkd3d_log callback); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __VKD3D_UTILS_H */ diff --git a/libs/vkd3d/include/vkd3d_windows.h b/libs/vkd3d/include/vkd3d_windows.h new file mode 100644 index 00000000000..002ff667cbc --- /dev/null +++ b/libs/vkd3d/include/vkd3d_windows.h @@ -0,0 +1,284 @@ +/* + * Copyright 2016 Józef Kucia for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_WINDOWS_H +#define __VKD3D_WINDOWS_H +#ifndef _INC_WINDOWS + +/* Nameless unions */ +#ifndef __C89_NAMELESS +# ifdef NONAMELESSUNION +# define __C89_NAMELESS +# define __C89_NAMELESSUNIONNAME u +# else +# define __C89_NAMELESS +# define __C89_NAMELESSUNIONNAME +# endif /* NONAMELESSUNION */ +#endif /* __C89_NAMELESS */ + +#if !defined(_WIN32) || defined(__WIDL__) + +# if !defined(__WIDL__) +# if !defined(VKD3D_WIN32_WCHAR) +# include +# endif +# include +# endif + +# ifdef __GNUC__ +# define DECLSPEC_ALIGN(x) __attribute__((aligned(x))) +# endif + +/* HRESULT */ +typedef int HRESULT; +# define SUCCEEDED(hr) ((HRESULT)(hr) >= 0) +# define FAILED(hr) ((HRESULT)(hr) < 0) + +# define _HRESULT_TYPEDEF_(x) ((HRESULT)x) + +# define S_OK _HRESULT_TYPEDEF_(0) +# define S_FALSE _HRESULT_TYPEDEF_(1) + +# define E_NOTIMPL _HRESULT_TYPEDEF_(0x80004001) +# define E_NOINTERFACE _HRESULT_TYPEDEF_(0x80004002) +# define E_POINTER _HRESULT_TYPEDEF_(0x80004003) +# define E_ABORT _HRESULT_TYPEDEF_(0x80004004) +# define E_FAIL _HRESULT_TYPEDEF_(0x80004005) +# define E_OUTOFMEMORY _HRESULT_TYPEDEF_(0x8007000E) +# define E_INVALIDARG _HRESULT_TYPEDEF_(0x80070057) + +# define DXGI_ERROR_NOT_FOUND _HRESULT_TYPEDEF_(0x887a0002) +# define DXGI_ERROR_MORE_DATA _HRESULT_TYPEDEF_(0x887a0003) + +# define D3DERR_INVALIDCALL _HRESULT_TYPEDEF_(0x8876086c) + +/* Basic types */ +typedef unsigned char BYTE; +typedef unsigned int DWORD; +typedef int INT; +typedef unsigned int UINT; +typedef int LONG; +typedef unsigned int ULONG; +typedef float FLOAT; +typedef LONG BOOL; + +/* Assuming LP64 model */ +typedef char INT8; +typedef unsigned char UINT8; +typedef short INT16; +typedef unsigned short UINT16; +typedef int INT32; +typedef unsigned int UINT32; +# if defined(__WIDL__) +typedef __int64 INT64; +typedef unsigned __int64 UINT64; +# else +typedef int64_t DECLSPEC_ALIGN(8) INT64; +typedef uint64_t DECLSPEC_ALIGN(8) UINT64; +# endif +typedef INT64 LONG64; +typedef long LONG_PTR; +typedef unsigned long ULONG_PTR; + +typedef ULONG_PTR SIZE_T; + +# ifdef VKD3D_WIN32_WCHAR +typedef unsigned short WCHAR; +# else +typedef wchar_t WCHAR; +# endif /* VKD3D_WIN32_WCHAR */ +typedef void *HANDLE; + +/* GUID */ +# ifdef __WIDL__ +typedef struct +{ + unsigned long Data1; + unsigned short Data2; + unsigned short Data3; + unsigned char Data4[8]; +} GUID; +# else +typedef struct _GUID +{ + unsigned int Data1; + unsigned short Data2; + unsigned short Data3; + unsigned char Data4[8]; +} GUID; +# endif + +typedef GUID IID; + +# ifdef INITGUID +# ifndef __cplusplus +# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ + const GUID name DECLSPEC_HIDDEN; \ + const GUID name = \ + { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} +# else +# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ + EXTERN_C const GUID name DECLSPEC_HIDDEN; \ + EXTERN_C const GUID name = \ + { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }} +# endif +# else +# define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ + EXTERN_C const GUID name DECLSPEC_HIDDEN; +# endif /* INITGUID */ + +/* __uuidof emulation */ +#if defined(__cplusplus) && !defined(_MSC_VER) + +extern "C++" +{ + template const GUID &__vkd3d_uuidof(); +} + +# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ + extern "C++" \ + { \ + template<> inline const GUID &__vkd3d_uuidof() \ + { \ + static const IID __uuid_inst = {l, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}; \ + return __uuid_inst; \ + } \ + template<> inline const GUID &__vkd3d_uuidof() \ + { \ + return __vkd3d_uuidof(); \ + } \ + } + +# define __uuidof(type) __vkd3d_uuidof() +#else +# define __CRT_UUID_DECL(type, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) +#endif /* defined(__cplusplus) && !defined(_MSC_VER) */ + +typedef struct SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES; +#endif /* !defined(_WIN32) || defined(__WIDL__) */ + + +#ifndef _WIN32 +# include +# include +# include + +# define COM_NO_WINDOWS_H + +# define FORCEINLINE inline + +# define CONTAINING_RECORD(address, type, field) \ + ((type *)((char *)(address) - offsetof(type, field))) + +# ifdef __x86_64__ +# define __stdcall __attribute__((ms_abi)) +# else +# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 2)) || defined(__APPLE__) +# define __stdcall __attribute__((__stdcall__)) __attribute__((__force_align_arg_pointer__)) +# else +# define __stdcall __attribute__((__stdcall__)) +# endif +# endif + +# define WINAPI __stdcall +# define STDMETHODCALLTYPE __stdcall + +# ifdef __GNUC__ +# define DECLSPEC_SELECTANY __attribute__((weak)) +# endif + +/* Macros for COM interfaces */ +# define interface struct +# define BEGIN_INTERFACE +# define END_INTERFACE +# define MIDL_INTERFACE(x) struct + +# ifdef __cplusplus +# define EXTERN_C extern "C" +# else +# define EXTERN_C extern +# endif + +# define CONST_VTBL const + +# define TRUE 1 +# define FALSE 0 + +# if defined(__cplusplus) && !defined(CINTERFACE) +# define REFIID const IID & +# define REFGUID const GUID & +# else +# define REFIID const IID * const +# define REFGUID const GUID * const +# endif + +#if defined(__cplusplus) && !defined(CINTERFACE) +# define IsEqualGUID(guid1, guid2) (!memcmp(&(guid1), &(guid2), sizeof(GUID))) +#else +# define IsEqualGUID(guid1, guid2) (!memcmp(guid1, guid2, sizeof(GUID))) +#endif + +#elif !defined(__WIDL__) + +# include + +#endif /* _WIN32 */ + + +/* Define DECLSPEC_HIDDEN */ +#ifndef DECLSPEC_HIDDEN +# if defined(__MINGW32__) +# define DECLSPEC_HIDDEN +# elif defined(__GNUC__) +# define DECLSPEC_HIDDEN __attribute__((visibility("hidden"))) +# else +# define DECLSPEC_HIDDEN +# endif +#endif /* DECLSPEC_HIDDEN */ + +/* Define min() & max() macros */ +#ifndef NOMINMAX +# ifndef min +# define min(a, b) (((a) <= (b)) ? (a) : (b)) +# endif + +# ifndef max +# define max(a, b) (((a) >= (b)) ? (a) : (b)) +# endif +#endif /* NOMINMAX */ + +#ifndef DEFINE_ENUM_FLAG_OPERATORS +#ifdef __cplusplus +# define DEFINE_ENUM_FLAG_OPERATORS(type) \ +extern "C++" \ +{ \ + inline type operator &(type x, type y) { return (type)((int)x & (int)y); } \ + inline type operator &=(type &x, type y) { return (type &)((int &)x &= (int)y); } \ + inline type operator ~(type x) { return (type)~(int)x; } \ + inline type operator |(type x, type y) { return (type)((int)x | (int)y); } \ + inline type operator |=(type &x, type y) { return (type &)((int &)x |= (int)y); } \ + inline type operator ^(type x, type y) { return (type)((int)x ^ (int)y); } \ + inline type operator ^=(type &x, type y) { return (type &)((int &)x ^= (int)y); } \ +} +#else +# define DEFINE_ENUM_FLAG_OPERATORS(type) +#endif +#endif /* DEFINE_ENUM_FLAG_OPERATORS */ + +#endif /* _INC_WINDOWS */ +#endif /* __VKD3D_WINDOWS_H */ diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index 30205088b1b..ce00e536d39 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -17,6 +17,7 @@ */ #define COBJMACROS + #include "vkd3d.h" #include "vkd3d_blob.h" #include "vkd3d_debug.h" diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index 499334a35f1..b363efbd360 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -40,9 +40,9 @@ #define VKD3D_DEBUG_BUFFER_COUNT 64 #define VKD3D_DEBUG_BUFFER_SIZE 512 -extern const char *vkd3d_dbg_env_name; +extern const char *const vkd3d_dbg_env_name; -static const char *debug_level_names[] = +static const char *const debug_level_names[] = { /* VKD3D_DBG_LEVEL_NONE */ "none", /* VKD3D_DBG_LEVEL_ERR */ "err", diff --git a/libs/vkd3d/libs/vkd3d-shader/trace.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c similarity index 98% rename from libs/vkd3d/libs/vkd3d-shader/trace.c rename to libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 6cd2dcb270c..0a821b5c878 100644 --- a/libs/vkd3d/libs/vkd3d-shader/trace.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -109,6 +109,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_DEQ ] = "deq", [VKD3DSIH_DFMA ] = "dfma", [VKD3DSIH_DGE ] = "dge", + [VKD3DSIH_DISCARD ] = "discard", [VKD3DSIH_DIV ] = "div", [VKD3DSIH_DLT ] = "dlt", [VKD3DSIH_DMAX ] = "dmax", @@ -645,7 +646,7 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3D_SHADER_RESOURCE_TEXTURE_3D: - shader_addline(buffer, "_3d"); + shader_addline(buffer, "_volume"); break; case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: @@ -660,8 +661,9 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, else if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE || semantic->resource.reg.reg.type == VKD3DSPR_UAV) { if (semantic->resource.reg.reg.type == VKD3DSPR_RESOURCE) - shader_addline(buffer, "_resource_"); + shader_addline(buffer, "_resource"); + shader_addline(buffer, "_"); shader_dump_resource_type(compiler, semantic->resource_type); if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) @@ -1505,9 +1507,9 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile { case VKD3DSIH_BREAKP: case VKD3DSIH_CONTINUEP: + case VKD3DSIH_DISCARD: case VKD3DSIH_IF: case VKD3DSIH_RETP: - case VKD3DSIH_TEXKILL: switch (ins->flags) { case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break; @@ -1857,11 +1859,11 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_addline(buffer, "\n"); } -enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out) +enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out) { enum vkd3d_shader_compile_option_formatting_flags formatting; - struct vkd3d_shader_version *shader_version; struct vkd3d_d3d_asm_compiler compiler; enum vkd3d_result result = VKD3D_OK; struct vkd3d_string_buffer *buffer; @@ -1919,16 +1921,16 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, buffer = &compiler.buffer; vkd3d_string_buffer_init(buffer); + compiler.shader_version = *shader_version; shader_version = &compiler.shader_version; - *shader_version = parser->shader_version; vkd3d_string_buffer_printf(buffer, "%s%s_%u_%u%s\n", compiler.colours.version, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset); indent = 0; - for (i = 0; i < parser->instructions.count; ++i) + for (i = 0; i < instructions->count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &instructions->elements[i]; switch (ins->handler_idx) { @@ -1981,12 +1983,13 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, return result; } -void vkd3d_shader_trace(struct vkd3d_shader_parser *parser) +void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version) { const char *p, *q, *end; struct vkd3d_shader_code code; - if (vkd3d_dxbc_binary_to_text(parser, NULL, &code) != VKD3D_OK) + if (vkd3d_dxbc_binary_to_text(instructions, shader_version, NULL, &code) != VKD3D_OK) return; end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index ed81137d225..712613ac13b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1,4 +1,6 @@ /* + * d3dbc (Direct3D shader models 1-3 bytecode) support + * * Copyright 2002-2003 Jason Edmeades * Copyright 2002-2003 Raphael Junqueira * Copyright 2004 Christian Costa @@ -6,6 +8,7 @@ * Copyright 2006 Ivan Gyurdiev * Copyright 2007-2008 Stefan Dösinger for CodeWeavers * Copyright 2009, 2021 Henri Verbeet for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,7 +25,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ -#include "vkd3d_shader_private.h" +#include "hlsl.h" #define VKD3D_SM1_VS 0xfffeu #define VKD3D_SM1_PS 0xffffu @@ -207,7 +210,7 @@ struct vkd3d_sm1_opcode_info struct vkd3d_shader_sm1_parser { const struct vkd3d_sm1_opcode_info *opcode_table; - const uint32_t *start, *end; + const uint32_t *start, *end, *ptr; bool abort; struct vkd3d_shader_parser p; @@ -462,6 +465,7 @@ static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader src->reg.idx[1].rel_addr = NULL; src->reg.idx[2].offset = ~0u; src->reg.idx[2].rel_addr = NULL; + src->reg.idx_count = 1; src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT); src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT; } @@ -480,6 +484,7 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader dst->reg.idx[1].rel_addr = NULL; dst->reg.idx[2].offset = ~0u; dst->reg.idx[2].rel_addr = NULL; + dst->reg.idx_count = 1; dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT; dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT; dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; @@ -661,6 +666,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const src_param->reg.idx[1].rel_addr = NULL; src_param->reg.idx[2].offset = ~0u; src_param->reg.idx[2].rel_addr = NULL; + src_param->reg.idx_count = 0; src_param->reg.immconst_type = type; memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t)); src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; @@ -671,7 +677,7 @@ static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) { - const uint32_t **ptr = &sm1->p.ptr; + const uint32_t **ptr = &sm1->ptr; const char *comment; unsigned int size; size_t remaining; @@ -738,13 +744,12 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, } } -static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) +static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); struct vkd3d_shader_src_param *src_params, *predicate; const struct vkd3d_sm1_opcode_info *opcode_info; struct vkd3d_shader_dst_param *dst_param; - const uint32_t **ptr = &parser->ptr; + const uint32_t **ptr = &sm1->ptr; uint32_t opcode_token; const uint32_t *p; bool predicated; @@ -758,11 +763,11 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru goto fail; } - ++parser->location.line; + ++sm1->p.location.line; opcode_token = read_u32(ptr); if (!(opcode_info = shader_sm1_get_opcode_info(sm1, opcode_token & VKD3D_SM1_OPCODE_MASK))) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, "Invalid opcode %#x (token 0x%08x, shader version %u.%u).", opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token, sm1->p.shader_version.major, sm1->p.shader_version.minor); @@ -775,14 +780,14 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_parser *parser, stru ins->raw = false; ins->structured = false; predicated = !!(opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED); - ins->predicate = predicate = predicated ? shader_parser_get_src_params(parser, 1) : NULL; + ins->predicate = predicate = predicated ? shader_parser_get_src_params(&sm1->p, 1) : NULL; ins->dst_count = opcode_info->dst_count; - ins->dst = dst_param = shader_parser_get_dst_params(parser, ins->dst_count); + ins->dst = dst_param = shader_parser_get_dst_params(&sm1->p, ins->dst_count); ins->src_count = opcode_info->src_count; - ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); + ins->src = src_params = shader_parser_get_src_params(&sm1->p, ins->src_count); if ((!predicate && predicated) || (!src_params && ins->src_count) || (!dst_param && ins->dst_count)) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory."); goto fail; } @@ -852,10 +857,9 @@ fail: *ptr = sm1->end; } -static bool shader_sm1_is_end(struct vkd3d_shader_parser *parser) +static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1) { - struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); - const uint32_t **ptr = &parser->ptr; + const uint32_t **ptr = &sm1->ptr; shader_sm1_read_comment(sm1); @@ -938,7 +942,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, shader_desc = &sm1->p.shader_desc; shader_desc->byte_code = code; shader_desc->byte_code_size = code_size; - sm1->p.ptr = sm1->start; + sm1->ptr = sm1->start; return VKD3D_OK; } @@ -965,7 +969,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi } instructions = &sm1->p.instructions; - while (!shader_sm1_is_end(&sm1->p)) + while (!shader_sm1_is_end(sm1)) { if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) { @@ -975,7 +979,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi return VKD3D_ERROR_OUT_OF_MEMORY; } ins = &instructions->elements[instructions->count]; - shader_sm1_read_instruction(&sm1->p, ins); + shader_sm1_read_instruction(sm1, ins); if (ins->handler_idx == VKD3DSIH_INVALID) { @@ -988,5 +992,1094 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi *parser = &sm1->p; - return VKD3D_OK; + return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; + D3DSHADER_PARAM_REGISTER_TYPE type; + DWORD offset; + } + register_table[] = + { + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type + && ctx->profile->major_version == register_table[i].major_version) + { + *type = register_table[i].type; + if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) + *reg = register_table[i].offset; + else + *reg = semantic->index; + return true; + } + } + + return false; +} + +bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +{ + static const struct + { + const char *name; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"binormal", D3DDECLUSAGE_BINORMAL}, + {"blendindices", D3DDECLUSAGE_BLENDINDICES}, + {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, + {"color", D3DDECLUSAGE_COLOR}, + {"depth", D3DDECLUSAGE_DEPTH}, + {"fog", D3DDECLUSAGE_FOG}, + {"normal", D3DDECLUSAGE_NORMAL}, + {"position", D3DDECLUSAGE_POSITION}, + {"positiont", D3DDECLUSAGE_POSITIONT}, + {"psize", D3DDECLUSAGE_PSIZE}, + {"sample", D3DDECLUSAGE_SAMPLE}, + {"sv_depth", D3DDECLUSAGE_DEPTH}, + {"sv_position", D3DDECLUSAGE_POSITION}, + {"sv_target", D3DDECLUSAGE_COLOR}, + {"tangent", D3DDECLUSAGE_TANGENT}, + {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, + {"texcoord", D3DDECLUSAGE_TEXCOORD}, + }; + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name)) + { + *usage = semantics[i].usage; + *usage_idx = semantic->index; + return true; + } + } + + return false; +} + +static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) +{ + if (type == VKD3D_SHADER_TYPE_VERTEX) + return D3DVS_VERSION(major, minor); + else + return D3DPS_VERSION(major, minor); +} + +static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3DXPC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->class); + vkd3d_unreachable(); + } +} + +static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3DXPT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3DXPT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_VERTEXSHADER: + return D3DXPT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3DXPT_VOID; + default: + vkd3d_unreachable(); + } +} + +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + unsigned int array_size = hlsl_get_multiarray_size(type); + unsigned int field_count = 0; + size_t fields_offset = 0; + size_t i; + + if (type->bytecode_offset) + return; + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm1_type(buffer, field->type, ctab_start); + } + + fields_offset = bytecode_align(buffer) - ctab_start; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); +} + +static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) + sm1_sort_extern(&sorted, var); + } + list_move_tail(&ctx->extern_vars, &sorted); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + struct hlsl_ir_function_decl *entry_func) +{ + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated) + continue; + + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + struct vkd3d_string_buffer *name; + + if (!(name = hlsl_get_string_buffer(ctx))) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); + var->name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + } + } + } + + sm1_sort_externs(ctx); + + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + + ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + creator_offset = put_u32(buffer, 0); + put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */ + + vars_start = bytecode_align(buffer); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + if (var->semantic.name || !var->regs[r].allocated) + continue; + + put_u32(buffer, 0); /* name */ + if (r == HLSL_REGSET_NUMERIC) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); + put_u32(buffer, var->data_type->reg_size[r] / 4); + } + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); + put_u32(buffer, var->regs[r].bind_count); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int r; + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { + size_t var_offset, name_offset; + + if (var->semantic.name || !var->regs[r].allocated) + continue; + + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + + name_offset = put_string(buffer, var->name); + set_u32(buffer, var_offset, name_offset - ctab_start); + + write_sm1_type(buffer, var->data_type, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + ++uniform_count; + } + } + + offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(buffer, creator_offset, offset - ctab_start); + + ctab_end = bytecode_align(buffer); + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); +} + +static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +{ + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +} + +struct sm1_instruction +{ + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + + struct sm1_dst_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; + } dst; + + struct sm1_src_register + { + D3DSHADER_PARAM_REGISTER_TYPE type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; + } srcs[3]; + unsigned int src_count; + + unsigned int has_dst; +}; + +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) +{ + assert(reg->writemask); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); +} + +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + const struct sm1_src_register *reg) +{ + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); +} + +static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct sm1_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int i; + + if (ctx->profile->major_version > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + if (instr->has_dst) + write_sm1_dst_register(buffer, &instr->dst); + + for (i = 0; i < instr->src_count; ++i) + write_sm1_src_register(buffer, &instr->srcs[i]); +}; + +static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) +{ + src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); +} + +static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, + const struct hlsl_reg *src3) +{ + struct sm1_instruction instr = + { + .opcode = D3DSIO_DP2ADD, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .srcs[2].type = D3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +{ + struct sm1_instruction instr = + { + .opcode = opcode, + + .dst.type = D3DSPR_TEMP, + .dst.mod = dst_mod, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, + .src_count = 1, + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &instr); +} + +static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) + { + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { + .type = D3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = i, + }; + + if (ctx->profile->major_version > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + write_sm1_dst_register(buffer, ®); + for (x = 0; x < 4; ++x) + put_f32(buffer, ctx->constant_defs.values[i].f[x]); + } +} + +static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool output) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { + ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); + assert(ret); + reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; + reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; + } + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + token = (1u << 31); + token |= usage << D3DSP_DCL_USAGE_SHIFT; + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + + reg.writemask = (1 << var->data_type->dimx) - 1; + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + bool write_in = false, write_out = false; + struct hlsl_ir_var *var; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) + write_in = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) + write_in = write_out = true; + else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) + write_in = true; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (write_in && var->is_input_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, false); + if (write_out && var->is_output_semantic) + write_sm1_semantic_dcl(ctx, buffer, var, true); + } +} + +static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; + break; + + case HLSL_SAMPLER_DIM_2D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; + break; + + case HLSL_SAMPLER_DIM_CUBE: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; + break; + + case HLSL_SAMPLER_DIM_3D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; + break; + + default: + vkd3d_unreachable(); + break; + } + + token = (1u << 31); + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + + reg.type = D3DSPR_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + + if (ctx->profile->major_version < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; + assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); + } + } + } +} + +static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(constant->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + unsigned int i; + + for (i = 0; i < instr->data_type->dimx; ++i) + { + struct hlsl_reg src = arg1->reg, dst = instr->reg; + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); + write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); + } +} + +static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; + + assert(instr->reg.allocated); + + if (instr->data_type->base_type != HLSL_TYPE_FLOAT) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + return; + } + + switch (expr->op) + { + case HLSL_OP1_ABS: + write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSX: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: + write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_EXP2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); + break; + + case HLSL_OP1_LOG2: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); + break; + + case HLSL_OP1_NEG: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + break; + + case HLSL_OP1_SAT: + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + break; + + case HLSL_OP1_RCP: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); + break; + + case HLSL_OP1_RSQ: + write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); + break; + + case HLSL_OP2_ADD: + write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: + write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: + write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: + write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP1_FRACT: + write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + break; + + case HLSL_OP2_DOT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: + write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_OP3_DP2ADD: + write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); + break; + } +} + +static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_load *load = hlsl_ir_load(instr); + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, + }; + + assert(instr->reg.allocated); + + if (load->src.var->is_uniform) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { + if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, + false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { + assert(reg.allocated); + sm1_instr.srcs[0].type = D3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else + sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); + } + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; + unsigned int sampler_offset, reg_id; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { + .opcode = D3DSIO_TEX, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .src_count = 2, + }; + + assert(instr->reg.allocated); + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_store *store = hlsl_ir_store(instr); + const struct hlsl_ir_node *rhs = store->rhs.node; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, + }; + + if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) + { + FIXME("Matrix writemasks need to be lowered.\n"); + return; + } + + if (store->lhs.var->is_output_semantic) + { + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) + { + sm1_instr.dst.type = D3DSPR_TEMP; + sm1_instr.dst.reg = 0; + } + else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, + true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { + assert(reg.allocated); + sm1_instr.dst.type = D3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else + sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + } + else + assert(reg.allocated); + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), + .src_count = 1, + }; + + assert(instr->reg.allocated); + assert(val->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + +static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_function_decl *entry_func) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { + /* These need to be lowered. */ + hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); + continue; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy."); + break; + } + + assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + write_sm1_constant(ctx, buffer, instr); + break; + + case HLSL_IR_EXPR: + write_sm1_expr(ctx, buffer, instr); + break; + + case HLSL_IR_LOAD: + write_sm1_load(ctx, buffer, instr); + break; + + case HLSL_IR_RESOURCE_LOAD: + write_sm1_resource_load(ctx, buffer, instr); + break; + + case HLSL_IR_STORE: + write_sm1_store(ctx, buffer, instr); + break; + + case HLSL_IR_SWIZZLE: + write_sm1_swizzle(ctx, buffer, instr); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} + +int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + int ret; + + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + + write_sm1_uniforms(ctx, &buffer, entry_func); + + write_sm1_constant_defs(ctx, &buffer); + write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_sampler_dcls(ctx, &buffer); + write_sm1_instructions(ctx, &buffer, entry_func); + + put_u32(&buffer, D3DSIO_END); + + if (!(ret = buffer.status)) + { + out->code = buffer.data; + out->size = buffer.size; + } + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index d99ea2e36b6..3e3f06faeb5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -19,1680 +19,74 @@ */ #include "vkd3d_shader_private.h" -#include "sm4.h" -#define SM4_MAX_SRC_COUNT 6 -#define SM4_MAX_DST_COUNT 2 - -STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); - -void dxbc_writer_init(struct dxbc_writer *dxbc) -{ - memset(dxbc, 0, sizeof(*dxbc)); -} - -void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) -{ - struct vkd3d_shader_dxbc_section_desc *section; - - assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - - section = &dxbc->sections[dxbc->section_count++]; - section->tag = tag; - section->data.code = data; - section->data.size = size; -} - -int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, - struct vkd3d_shader_code *dxbc, char **messages) -{ - size_t size_position, offsets_position, checksum_position, i; - struct vkd3d_bytecode_buffer buffer = {0}; - uint32_t checksum[4]; - - TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); - - if (messages) - *messages = NULL; - - put_u32(&buffer, TAG_DXBC); - - checksum_position = bytecode_get_size(&buffer); - for (i = 0; i < 4; ++i) - put_u32(&buffer, 0); - - put_u32(&buffer, 1); /* version */ - size_position = put_u32(&buffer, 0); - put_u32(&buffer, section_count); - - offsets_position = bytecode_get_size(&buffer); - for (i = 0; i < section_count; ++i) - put_u32(&buffer, 0); - - for (i = 0; i < section_count; ++i) - { - set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_get_size(&buffer)); - put_u32(&buffer, sections[i].tag); - put_u32(&buffer, sections[i].data.size); - bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); - } - set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - - vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); - for (i = 0; i < 4; ++i) - set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); - - if (!buffer.status) - { - dxbc->code = buffer.data; - dxbc->size = buffer.size; - } - return buffer.status; -} - -int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) -{ - return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); -} - -struct vkd3d_shader_src_param_entry -{ - struct list entry; - struct vkd3d_shader_src_param param; -}; - -struct vkd3d_shader_sm4_parser -{ - const uint32_t *start, *end; - - unsigned int output_map[MAX_REG_OUTPUT]; - - struct vkd3d_shader_parser p; -}; - -struct vkd3d_sm4_opcode_info -{ - enum vkd3d_sm4_opcode opcode; - enum vkd3d_shader_opcode handler_idx; - char dst_info[SM4_MAX_DST_COUNT]; - char src_info[SM4_MAX_SRC_COUNT]; - void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); -}; - -static const enum vkd3d_primitive_type output_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, -}; - -static const enum vkd3d_primitive_type input_primitive_type_table[] = -{ - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, - /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, - /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* UNKNOWN */ VKD3D_PT_UNDEFINED, - /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, - /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, -}; - -static const enum vkd3d_shader_resource_type resource_type_table[] = -{ - /* 0 */ VKD3D_SHADER_RESOURCE_NONE, - /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, - /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, - /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, - /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, - /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, - /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, -}; - -static const enum vkd3d_data_type data_type_table[] = -{ - /* 0 */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, - /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, - /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, - /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, - /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, - /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, - /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, - /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, - /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, -}; - -static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) -{ - return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); -} - -static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) -{ - const struct vkd3d_shader_version *version = &sm4->p.shader_version; - - return version->major >= 5 && version->minor >= 1; -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); -static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); - -static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, - const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) -{ - *register_space = 0; - - if (!shader_is_sm_5_1(priv)) - return true; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - - *register_space = *(*ptr)++; - return true; -} - -static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, - (struct vkd3d_shader_src_param *)&ins->src[0]); - ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? - VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; -} - -static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_immediate_constant_buffer *icb; - enum vkd3d_sm4_shader_data_type type; - unsigned int icb_size; - - type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; - if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) - { - FIXME("Ignoring shader data type %#x.\n", type); - ins->handler_idx = VKD3DSIH_NOP; - return; - } - - ++tokens; - icb_size = token_count - 1; - if (icb_size % 4) - { - FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - - if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) - { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); - vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - icb->vec4_count = icb_size / 4; - memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); - shader_instruction_array_add_icb(&priv->p.instructions, icb); - ins->declaration.icb = icb; -} - -static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, - const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) -{ - range->first = reg->idx[1].offset; - range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; - if (range->last < range->first) - { - FIXME("Invalid register range [%u:%u].\n", range->first, range->last); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, - "Last register %u must not be less than first register %u in range.\n", range->last, range->first); - } -} - -static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; - enum vkd3d_sm4_resource_type resource_type; - const uint32_t *end = &tokens[token_count]; - enum vkd3d_sm4_data_type data_type; - enum vkd3d_data_type reg_data_type; - DWORD components; - unsigned int i; - - resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; - if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) - { - FIXME("Unhandled resource type %#x.\n", resource_type); - semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - else - { - semantic->resource_type = resource_type_table[resource_type]; - } - - if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS - || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) - { - semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - - reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; - shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); - - components = *tokens++; - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - semantic->resource_data_type[i] = data_type_table[data_type]; - } - } - - if (reg_data_type == VKD3D_DATA_UAV) - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - - shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); -} - -static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); - if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) - ins->flags |= VKD3DSI_INDEXED_DYNAMIC; - - ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; - ins->declaration.cb.range.space = 0; - - if (shader_is_sm_5_1(priv)) - { - if (tokens >= end) - { - FIXME("Invalid ptr %p >= end %p.\n", tokens, end); - return; - } - - ins->declaration.cb.size = *tokens++; - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); - } -} - -static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - const uint32_t *end = &tokens[token_count]; - - ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; - if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) - FIXME("Unhandled sampler mode %#x.\n", ins->flags); - shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); - shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); - shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); -} - -static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, - &ins->declaration.index_range.dst); - ins->declaration.index_range.register_count = *tokens; -} - -static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_output_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - else - ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled output primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - enum vkd3d_sm4_input_primitive_type primitive_type; - - primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; - if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) - { - ins->declaration.primitive_type.type = VKD3D_PT_PATCH; - ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; - } - else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) - { - ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; - } - else - { - ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; - } - - if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) - FIXME("Unhandled input primitive type %#x.\n", primitive_type); -} - -static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = *tokens; -} - -static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); -} - -static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.register_semantic.reg); - ins->declaration.register_semantic.sysval_semantic = *tokens; -} - -static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.indexable_temp.register_idx = *tokens++; - ins->declaration.indexable_temp.register_size = *tokens++; - ins->declaration.indexable_temp.component_count = *tokens; -} - -static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; -} - -static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; - src_params[0].reg.u.fp_body_idx = *tokens++; - shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); -} - -static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens; -} - -static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.index = *tokens++; - FIXME("Ignoring set of function bodies (count %u).\n", *tokens); -} - -static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.fp.index = *tokens++; - ins->declaration.fp.body_count = *tokens++; - ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; - ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; - FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); -} - -static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) - >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -} - -static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.max_tessellation_factor = *(float *)tokens; -} - -static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->declaration.thread_group_size.x = *tokens++; - ins->declaration.thread_group_size.y = *tokens++; - ins->declaration.thread_group_size.z = *tokens++; -} - -static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); - ins->declaration.tgsm_raw.byte_count = *tokens; - if (ins->declaration.tgsm_raw.byte_count % 4) - FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); -} - -static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, - &ins->declaration.tgsm_structured.reg); - ins->declaration.tgsm_structured.byte_stride = *tokens++; - ins->declaration.tgsm_structured.structure_count = *tokens; - if (ins->declaration.tgsm_structured.byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); -} - -static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - resource->byte_stride = *tokens++; - if (resource->byte_stride % 4) - FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, - uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; - const uint32_t *end = &tokens[token_count]; - - shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); - shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); - shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -} - -static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -{ - ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; -} - -/* - * d -> VKD3D_DATA_DOUBLE - * f -> VKD3D_DATA_FLOAT - * i -> VKD3D_DATA_INT - * u -> VKD3D_DATA_UINT - * O -> VKD3D_DATA_OPAQUE - * R -> VKD3D_DATA_RESOURCE - * S -> VKD3D_DATA_SAMPLER - * U -> VKD3D_DATA_UAV - */ -static const struct vkd3d_sm4_opcode_info opcode_table[] = -{ - {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, - {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, - {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, - {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, - {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, - {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, - {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, - {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, - {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, - {VKD3D_SM4_OP_DISCARD, VKD3DSIH_TEXKILL, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, - {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, - {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, - {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, - {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, - {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, - {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, - {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, - {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, - {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, - {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, - {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, - {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, - {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, - {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, - {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, - {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, - {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, - {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, - {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, - {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, - {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, - {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, - {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, - {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, - {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, - {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, - {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, - {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, - {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, - {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, - {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, - {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, - {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, - {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, - {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, - {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, - {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", - shader_sm4_read_shader_data}, - {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, - {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, - {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, - {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, - {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, - {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, - {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, - {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, - {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, - {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", - shader_sm4_read_conditional_op}, - {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, - {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, - {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, - {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, - {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, - {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, - {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, - {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, - {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, - {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, - {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, - {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, - {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, - {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, - {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, - {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, - {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, - {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, - {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", - shader_sm4_read_dcl_constant_buffer}, - {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", - shader_sm4_read_dcl_sampler}, - {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", - shader_sm4_read_dcl_index_range}, - {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", - shader_sm4_read_dcl_output_topology}, - {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", - shader_sm4_read_dcl_input_primitive}, - {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", - shader_sm4_read_dcl_input_ps}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", - shader_sm4_read_dcl_input_ps_siv}, - {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", - shader_sm4_read_declaration_dst}, - {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", - shader_sm4_read_declaration_register_semantic}, - {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", - shader_sm4_read_dcl_indexable_temp}, - {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", - shader_sm4_read_dcl_global_flags}, - {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, - {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, - {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, - {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, - {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, - {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, - {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, - {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, - {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, - {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", - shader_sm5_read_fcall}, - {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, - {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, - {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, - {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, - {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, - {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, - {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, - {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, - {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, - {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, - {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, - {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, - {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, - {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, - {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, - {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", - shader_sm5_read_dcl_function_body}, - {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", - shader_sm5_read_dcl_function_table}, - {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", - shader_sm5_read_dcl_interface}, - {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", - shader_sm5_read_control_point_count}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", - shader_sm5_read_dcl_tessellator_domain}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", - shader_sm5_read_dcl_tessellator_partitioning}, - {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", - shader_sm5_read_dcl_tessellator_output_primitive}, - {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", - shader_sm5_read_dcl_hs_max_tessfactor}, - {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", - shader_sm5_read_dcl_thread_group}, - {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", - shader_sm4_read_dcl_resource}, - {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", - shader_sm5_read_dcl_uav_raw}, - {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", - shader_sm5_read_dcl_uav_structured}, - {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", - shader_sm5_read_dcl_tgsm_raw}, - {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", - shader_sm5_read_dcl_tgsm_structured}, - {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", - shader_sm5_read_dcl_resource_raw}, - {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", - shader_sm5_read_dcl_resource_structured}, - {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, - {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, - {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, - {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, - {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, - {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, - {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, - {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, - {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, - {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, - {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, - {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", - shader_sm5_read_sync}, - {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, - {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, - {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, - {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, - {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, - {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, - {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, - {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, - {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, - {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, - {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, - {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, - {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, - {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, - {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", - shader_sm4_read_declaration_count}, - {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, - {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, - {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, - {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, - {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, - {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, - {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, - {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, - {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, - {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, - {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, - {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, - {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, - {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, - {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, - {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, - {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, - {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, - {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, - {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, -}; - -static const enum vkd3d_shader_register_type register_type_table[] = -{ - /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, - /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, - /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, - /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, - /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, - /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, - /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, - /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, - /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, - /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, - /* UNKNOWN */ ~0u, - /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, - /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, - /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, - /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, - /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, - /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, - /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, - /* UNKNOWN */ ~0u, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, - /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, - /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, - /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, - /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, - /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, - /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, - /* UNKNOWN */ ~0u, - /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, - /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, - /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, - /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, - /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, - /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, - /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, - /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, - /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, - /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, - /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, - /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, -}; - -static const enum vkd3d_shader_register_precision register_precision_table[] = -{ - /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, - /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, - /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, -}; - -static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) -{ - unsigned int i; - - for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) - { - if (opcode == opcode_table[i].opcode) return &opcode_table[i]; - } - - return NULL; -} - -static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) -{ - switch (sm4->p.shader_version.type) - { - case VKD3D_SHADER_TYPE_PIXEL: - if (reg->type == VKD3DSPR_OUTPUT) - { - unsigned int reg_idx = reg->idx[0].offset; - - if (reg_idx >= ARRAY_SIZE(sm4->output_map)) - { - ERR("Invalid output index %u.\n", reg_idx); - break; - } - - reg->type = VKD3DSPR_COLOROUT; - reg->idx[0].offset = sm4->output_map[reg_idx]; - } - break; - - default: - break; - } -} - -static enum vkd3d_data_type map_data_type(char t) -{ - switch (t) - { - case 'd': - return VKD3D_DATA_DOUBLE; - case 'f': - return VKD3D_DATA_FLOAT; - case 'i': - return VKD3D_DATA_INT; - case 'u': - return VKD3D_DATA_UINT; - case 'O': - return VKD3D_DATA_OPAQUE; - case 'R': - return VKD3D_DATA_RESOURCE; - case 'S': - return VKD3D_DATA_SAMPLER; - case 'U': - return VKD3D_DATA_UAV; - default: - ERR("Invalid data type '%c'.\n", t); - return VKD3D_DATA_FLOAT; - } -} - -static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - shader_instruction_array_destroy(&parser->instructions); - free_shader_desc(&parser->shader_desc); - vkd3d_free(sm4); -} - -static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) -{ - if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) - { - struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); - - if (!(reg_idx->rel_addr = rel_addr)) - { - ERR("Failed to get src param for relative addressing.\n"); - return false; - } - - if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) - reg_idx->offset = *(*ptr)++; - else - reg_idx->offset = 0; - shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); - } - else - { - reg_idx->rel_addr = NULL; - reg_idx->offset = *(*ptr)++; - } - - return true; -} - -static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) -{ - switch (register_type) - { - case VKD3D_SM4_RT_SAMPLER: - case VKD3D_SM4_RT_RESOURCE: - case VKD3D_SM4_RT_CONSTBUFFER: - case VKD3D_SM5_RT_UAV: - return true; - - default: - return false; - } -} - -static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, - enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) -{ - enum vkd3d_sm4_register_precision precision; - enum vkd3d_sm4_register_type register_type; - enum vkd3d_sm4_extended_operand_type type; - enum vkd3d_sm4_register_modifier m; - uint32_t token, order, extended; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = *(*ptr)++; - - register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; - if (register_type >= ARRAY_SIZE(register_type_table) - || register_type_table[register_type] == VKD3DSPR_INVALID) - { - FIXME("Unhandled register type %#x.\n", register_type); - param->type = VKD3DSPR_TEMP; - } - else - { - param->type = register_type_table[register_type]; - } - param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; - param->non_uniform = false; - param->data_type = data_type; - - *modifier = VKD3DSPSM_NONE; - if (token & VKD3D_SM4_EXTENDED_OPERAND) - { - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - extended = *(*ptr)++; - - if (extended & VKD3D_SM4_EXTENDED_OPERAND) - { - FIXME("Skipping second-order extended operand.\n"); - *ptr += *ptr < end; - } - - type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; - if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) - { - m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; - switch (m) - { - case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: - *modifier = VKD3DSPSM_NEG; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS: - *modifier = VKD3DSPSM_ABS; - break; - - case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: - *modifier = VKD3DSPSM_ABSNEG; - break; - - default: - FIXME("Unhandled register modifier %#x.\n", m); - /* fall-through */ - case VKD3D_SM4_REGISTER_MODIFIER_NONE: - break; - } - - precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; - if (precision >= ARRAY_SIZE(register_precision_table) - || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) - { - FIXME("Unhandled register precision %#x.\n", precision); - param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; - } - else - { - param->precision = register_precision_table[precision]; - } - - if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) - param->non_uniform = true; - - extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK - | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK - | VKD3D_SM4_EXTENDED_OPERAND); - if (extended) - FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); - } - else if (type) - { - FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); - } - } - - order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; - - if (order < 1) - { - param->idx[0].offset = ~0u; - param->idx[0].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 2) - { - param->idx[1].offset = ~0u; - param->idx[1].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order < 3) - { - param->idx[2].offset = ~0u; - param->idx[2].rel_addr = NULL; - } - else - { - DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; - if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) - { - ERR("Failed to read register index.\n"); - return false; - } - } - - if (order > 3) - { - WARN("Unhandled order %u.\n", order); - return false; - } - - if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) - { - enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; - unsigned int dword_count; - - switch (dimension) - { - case VKD3D_SM4_DIMENSION_SCALAR: - param->immconst_type = VKD3D_IMMCONST_SCALAR; - dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); - if (end - *ptr < dword_count) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); - *ptr += dword_count; - break; - - case VKD3D_SM4_DIMENSION_VEC4: - param->immconst_type = VKD3D_IMMCONST_VEC4; - if (end - *ptr < VKD3D_VEC4_SIZE) - { - WARN("Invalid ptr %p, end %p.\n", *ptr, end); - return false; - } - memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); - *ptr += 4; - break; - - default: - FIXME("Unhandled dimension %#x.\n", dimension); - break; - } - } - else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) - { - /* SM5.1 places a symbol identifier in idx[0] and moves - * other values up one slot. Normalize to SM5.1. */ - param->idx[2] = param->idx[1]; - param->idx[1] = param->idx[0]; - } - - map_register(priv, param); - - return true; -} - -static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) -{ - switch (reg->type) - { - case VKD3DSPR_COVERAGE: - case VKD3DSPR_DEPTHOUT: - case VKD3DSPR_DEPTHOUTGE: - case VKD3DSPR_DEPTHOUTLE: - case VKD3DSPR_GSINSTID: - case VKD3DSPR_LOCALTHREADINDEX: - case VKD3DSPR_OUTPOINTID: - case VKD3DSPR_PRIMID: - case VKD3DSPR_SAMPLEMASK: - case VKD3DSPR_OUTSTENCILREF: - return true; - default: - return false; - } -} - -static uint32_t swizzle_from_sm4(uint32_t s) -{ - return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); -} - -static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +void dxbc_writer_init(struct dxbc_writer *dxbc) { - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) - { - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - } - else - { - enum vkd3d_sm4_swizzle_type swizzle_type = - (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - - switch (swizzle_type) - { - case VKD3D_SM4_SWIZZLE_NONE: - if (shader_sm4_is_scalar_register(&src_param->reg)) - src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - else - src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; - break; - - case VKD3D_SM4_SWIZZLE_SCALAR: - src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; - src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; - break; - - case VKD3D_SM4_SWIZZLE_VEC4: - src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); - break; - - default: - FIXME("Unhandled swizzle type %#x.\n", swizzle_type); - break; - } - } - - return true; + memset(dxbc, 0, sizeof(*dxbc)); } -static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, - const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void *data, size_t size) { - enum vkd3d_shader_src_modifier modifier; - DWORD token; - - if (*ptr >= end) - { - WARN("Invalid ptr %p >= end %p.\n", *ptr, end); - return false; - } - token = **ptr; - - if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) - { - ERR("Failed to read parameter.\n"); - return false; - } - - if (modifier != VKD3DSPSM_NONE) - { - ERR("Invalid source modifier %#x on destination register.\n", modifier); - return false; - } + struct vkd3d_shader_dxbc_section_desc *section; - dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; - if (data_type == VKD3D_DATA_DOUBLE) - dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); - /* Scalar registers are declared with no write mask in shader bytecode. */ - if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) - dst_param->write_mask = VKD3DSP_WRITEMASK_0; - dst_param->modifiers = 0; - dst_param->shift = 0; + assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - return true; + section = &dxbc->sections[dxbc->section_count++]; + section->tag = tag; + section->data.code = data; + section->data.size = size; } -static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, + struct vkd3d_shader_code *dxbc, char **messages) { - enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; - - switch (modifier_type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - { - static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER - | VKD3D_SM4_MODIFIER_MASK - | VKD3D_SM4_AOFFIMMI_U_MASK - | VKD3D_SM4_AOFFIMMI_V_MASK - | VKD3D_SM4_AOFFIMMI_W_MASK; - - /* Bit fields are used for sign extension. */ - struct - { - int u : 4; - int v : 4; - int w : 4; - } aoffimmi; - - if (modifier & ~recognized_bits) - FIXME("Unhandled instruction modifier %#x.\n", modifier); - - aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; - aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; - aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; - ins->texel_offset.u = aoffimmi.u; - ins->texel_offset.v = aoffimmi.v; - ins->texel_offset.w = aoffimmi.w; - break; - } - - case VKD3D_SM5_MODIFIER_DATA_TYPE: - { - DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; - unsigned int i; - - for (i = 0; i < VKD3D_VEC4_SIZE; i++) - { - enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); - - if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) - { - FIXME("Unhandled data type %#x.\n", data_type); - ins->resource_data_type[i] = VKD3D_DATA_FLOAT; - } - else - { - ins->resource_data_type[i] = data_type_table[data_type]; - } - } - break; - } - - case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: - { - enum vkd3d_sm4_resource_type resource_type - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; - - if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) - ins->raw = true; - else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) - ins->structured = true; - - if (resource_type < ARRAY_SIZE(resource_type_table)) - ins->resource_type = resource_type_table[resource_type]; - else - { - FIXME("Unhandled resource type %#x.\n", resource_type); - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - } - - ins->resource_stride - = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; - break; - } + size_t size_position, offsets_position, checksum_position, i; + struct vkd3d_bytecode_buffer buffer = {0}; + uint32_t checksum[4]; - default: - FIXME("Unhandled instruction modifier %#x.\n", modifier); - } -} + TRACE("section_count %zu, sections %p, dxbc %p, messages %p.\n", section_count, sections, dxbc, messages); -static void shader_sm4_read_instruction(struct vkd3d_shader_parser *parser, struct vkd3d_shader_instruction *ins) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - const struct vkd3d_sm4_opcode_info *opcode_info; - uint32_t opcode_token, opcode, previous_token; - struct vkd3d_shader_dst_param *dst_params; - struct vkd3d_shader_src_param *src_params; - const uint32_t **ptr = &parser->ptr; - unsigned int i, len; - size_t remaining; - const uint32_t *p; - DWORD precise; + if (messages) + *messages = NULL; - if (*ptr >= sm4->end) - { - WARN("End of byte-code, failed to read opcode.\n"); - goto fail; - } - remaining = sm4->end - *ptr; + put_u32(&buffer, TAG_DXBC); - ++parser->location.line; + checksum_position = bytecode_get_size(&buffer); + for (i = 0; i < 4; ++i) + put_u32(&buffer, 0); - opcode_token = *(*ptr)++; - opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + put_u32(&buffer, 1); /* version */ + size_position = put_u32(&buffer, 0); + put_u32(&buffer, section_count); - len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - if (!len) - { - if (remaining < 2) - { - WARN("End of byte-code, failed to read length token.\n"); - goto fail; - } - len = **ptr; - } - if (!len || remaining < len) - { - WARN("Read invalid length %u (remaining %zu).\n", len, remaining); - goto fail; - } - --len; + offsets_position = bytecode_get_size(&buffer); + for (i = 0; i < section_count; ++i) + put_u32(&buffer, 0); - if (!(opcode_info = get_opcode_info(opcode))) + for (i = 0; i < section_count; ++i) { - FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); - ins->handler_idx = VKD3DSIH_INVALID; - *ptr += len; - return; - } - - ins->handler_idx = opcode_info->handler_idx; - ins->flags = 0; - ins->coissue = false; - ins->raw = false; - ins->structured = false; - ins->predicate = NULL; - ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); - ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); - ins->src = src_params = shader_parser_get_src_params(parser, ins->src_count); - if (!src_params && ins->src_count) - { - ERR("Failed to allocate src parameters.\n"); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; + set_u32(&buffer, offsets_position + i * sizeof(uint32_t), bytecode_align(&buffer)); + put_u32(&buffer, sections[i].tag); + put_u32(&buffer, sections[i].data.size); + bytecode_put_bytes(&buffer, sections[i].data.code, sections[i].data.size); } - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; - ins->resource_stride = 0; - ins->resource_data_type[0] = VKD3D_DATA_FLOAT; - ins->resource_data_type[1] = VKD3D_DATA_FLOAT; - ins->resource_data_type[2] = VKD3D_DATA_FLOAT; - ins->resource_data_type[3] = VKD3D_DATA_FLOAT; - memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + set_u32(&buffer, size_position, bytecode_get_size(&buffer)); - p = *ptr; - *ptr += len; + vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); - if (opcode_info->read_opcode_func) - { - ins->dst = NULL; - ins->dst_count = 0; - opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); - } - else + if (!buffer.status) { - enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; - - previous_token = opcode_token; - while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) - shader_sm4_read_instruction_modifier(previous_token = *p++, ins); - - ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) - { - ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; - instruction_dst_modifier = VKD3DSPDM_SATURATE; - } - precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; - ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; - - ins->dst = dst_params = shader_parser_get_dst_params(parser, ins->dst_count); - if (!dst_params && ins->dst_count) - { - ERR("Failed to allocate dst parameters.\n"); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - for (i = 0; i < ins->dst_count; ++i) - { - if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), - &dst_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - dst_params[i].modifiers |= instruction_dst_modifier; - } - - for (i = 0; i < ins->src_count; ++i) - { - if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), - &src_params[i]))) - { - ins->handler_idx = VKD3DSIH_INVALID; - return; - } - } + dxbc->code = buffer.data; + dxbc->size = buffer.size; } - - return; - -fail: - *ptr = sm4->end; - ins->handler_idx = VKD3DSIH_INVALID; - return; -} - -static bool shader_sm4_is_end(struct vkd3d_shader_parser *parser) -{ - struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); - - return parser->ptr == sm4->end; + return buffer.status; } -static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = -{ - .parser_destroy = shader_sm4_destroy, -}; - -static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, - size_t byte_code_size, const char *source_name, const struct vkd3d_shader_signature *output_signature, - struct vkd3d_shader_message_context *message_context) +int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *out) { - struct vkd3d_shader_version version; - uint32_t version_token, token_count; - unsigned int i; - - if (byte_code_size / sizeof(*byte_code) < 2) - { - WARN("Invalid byte code size %lu.\n", (long)byte_code_size); - return false; - } - - version_token = byte_code[0]; - TRACE("Version: 0x%08x.\n", version_token); - token_count = byte_code[1]; - TRACE("Token count: %u.\n", token_count); - - if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) - { - WARN("Invalid token count %u.\n", token_count); - return false; - } - - sm4->start = &byte_code[2]; - sm4->end = &byte_code[token_count]; - - switch (version_token >> 16) - { - case VKD3D_SM4_PS: - version.type = VKD3D_SHADER_TYPE_PIXEL; - break; - - case VKD3D_SM4_VS: - version.type = VKD3D_SHADER_TYPE_VERTEX; - break; - - case VKD3D_SM4_GS: - version.type = VKD3D_SHADER_TYPE_GEOMETRY; - break; - - case VKD3D_SM5_HS: - version.type = VKD3D_SHADER_TYPE_HULL; - break; - - case VKD3D_SM5_DS: - version.type = VKD3D_SHADER_TYPE_DOMAIN; - break; - - case VKD3D_SM5_CS: - version.type = VKD3D_SHADER_TYPE_COMPUTE; - break; - - default: - FIXME("Unrecognised shader type %#x.\n", version_token >> 16); - } - version.major = VKD3D_SM4_VERSION_MAJOR(version_token); - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, - token_count / 7u + 20)) - return false; - sm4->p.ptr = sm4->start; - - memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); - for (i = 0; i < output_signature->element_count; ++i) - { - struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; - - if (version.type == VKD3D_SHADER_TYPE_PIXEL - && ascii_strcasecmp(e->semantic_name, "SV_Target")) - continue; - if (e->register_index >= ARRAY_SIZE(sm4->output_map)) - { - WARN("Invalid output index %u.\n", e->register_index); - continue; - } - - sm4->output_map[e->register_index] = e->semantic_index; - } - - return true; + return vkd3d_shader_serialize_dxbc(dxbc->section_count, dxbc->sections, out, NULL); } static bool require_space(size_t offset, size_t count, size_t size, size_t data_size) @@ -1928,12 +322,12 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, } static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *s) + struct vkd3d_shader_message_context *message_context, struct shader_signature *s) { bool has_stream_index, has_min_precision; - struct vkd3d_shader_signature_element *e; const char *data = section->data.code; uint32_t count, header_size; + struct signature_element *e; const char *ptr = data; unsigned int i; @@ -1979,6 +373,8 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s { uint32_t name_offset, mask; + e[i].sort_index = i; + if (has_stream_index) read_dword(&ptr, &e[i].stream_index); else @@ -1995,6 +391,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s read_dword(&ptr, &e[i].sysval_semantic); read_dword(&ptr, &e[i].component_type); read_dword(&ptr, &e[i].register_index); + e[i].register_count = 1; read_dword(&ptr, &mask); e[i].mask = mask & 0xff; e[i].used_mask = (mask >> 8) & 0xff; @@ -2029,7 +426,7 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *ctx) { - struct vkd3d_shader_signature *is = ctx; + struct shader_signature *is = ctx; if (section->tag != TAG_ISGN) return VKD3D_OK; @@ -2037,13 +434,13 @@ static int isgn_handler(const struct vkd3d_shader_dxbc_section_desc *section, if (is->elements) { FIXME("Multiple input signatures.\n"); - vkd3d_shader_free_shader_signature(is); + shader_signature_cleanup(is); } return shader_parse_signature(section, message_context, is); } int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature) + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature) { int ret; @@ -2122,12 +519,12 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, void free_shader_desc(struct vkd3d_shader_desc *desc) { - vkd3d_shader_free_shader_signature(&desc->input_signature); - vkd3d_shader_free_shader_signature(&desc->output_signature); - vkd3d_shader_free_shader_signature(&desc->patch_constant_signature); + shader_signature_cleanup(&desc->input_signature); + shader_signature_cleanup(&desc->output_signature); + shader_signature_cleanup(&desc->patch_constant_signature); } -static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) { int ret; @@ -2151,66 +548,6 @@ static int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, return ret; } -int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) -{ - struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_desc *shader_desc; - struct vkd3d_shader_instruction *ins; - struct vkd3d_shader_sm4_parser *sm4; - int ret; - - if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) - { - ERR("Failed to allocate parser.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - - shader_desc = &sm4->p.shader_desc; - if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) - { - WARN("Failed to extract shader, vkd3d result %d.\n", ret); - vkd3d_free(sm4); - return ret; - } - - if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, &shader_desc->output_signature, message_context)) - { - WARN("Failed to initialise shader parser.\n"); - free_shader_desc(shader_desc); - vkd3d_free(sm4); - return VKD3D_ERROR_INVALID_ARGUMENT; - } - - instructions = &sm4->p.instructions; - while (!shader_sm4_is_end(&sm4->p)) - { - if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { - ERR("Failed to allocate instructions.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(&sm4->p, ins); - - if (ins->handler_idx == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - shader_sm4_destroy(&sm4->p); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - ++instructions->count; - } - - *parser = &sm4->p; - - return VKD3D_OK; -} - /* root signatures */ #define VKD3D_ROOT_SIGNATURE_1_0_ROOT_DESCRIPTOR_FLAGS VKD3D_SHADER_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE @@ -2862,7 +1199,7 @@ static int shader_write_root_parameters(struct root_signature_writer_context *co size_t parameters_position; unsigned int i; - parameters_position = bytecode_get_size(buffer); + parameters_position = bytecode_align(buffer); for (i = 0; i < parameter_count; ++i) { put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 64d6e87065b..ba5bcfbfaf0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) void hlsl_free_var(struct hlsl_ir_var *decl) { + unsigned int k; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); vkd3d_free(decl); } @@ -126,7 +130,7 @@ bool hlsl_type_is_row_major(const struct hlsl_type *type) unsigned int hlsl_type_minor_size(const struct hlsl_type *type) { - if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) return type->dimx; else return type->dimy; @@ -134,7 +138,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type) unsigned int hlsl_type_major_size(const struct hlsl_type *type) { - if (type->type != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) + if (type->class != HLSL_CLASS_MATRIX || hlsl_type_is_row_major(type)) return type->dimy; else return type->dimx; @@ -142,7 +146,7 @@ unsigned int hlsl_type_major_size(const struct hlsl_type *type) unsigned int hlsl_type_element_count(const struct hlsl_type *type) { - switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: return type->dimx; @@ -157,16 +161,26 @@ unsigned int hlsl_type_element_count(const struct hlsl_type *type) } } -static unsigned int get_array_size(const struct hlsl_type *type) +const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_get_multiarray_element_type(type->e.array.type); + return type; +} + +unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type) { - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_get_multiarray_size(type->e.array.type) * type->e.array.elements_count; return 1; } bool hlsl_type_is_resource(const struct hlsl_type *type) { - if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_is_resource(type->e.array.type); + + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) { @@ -183,10 +197,13 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) { - if (type->type <= HLSL_CLASS_LAST_NUMERIC) + if (type->class <= HLSL_CLASS_LAST_NUMERIC) return HLSL_REGSET_NUMERIC; - if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_get_regset(type->e.array.type); + + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) { @@ -203,8 +220,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) vkd3d_unreachable(); } } - else if (type->type == HLSL_CLASS_ARRAY) - return hlsl_type_get_regset(type->e.array.type); vkd3d_unreachable(); } @@ -216,7 +231,8 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int * (b) the type would cross a vec4 boundary; i.e. a vec3 and a * vec1 can be packed together, but not a vec3 and a vec2. */ - if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) + if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY + || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) return align(offset, 4); return offset; } @@ -229,7 +245,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type for (k = 0; k <= HLSL_REGSET_LAST; ++k) type->reg_size[k] = 0; - switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -278,7 +294,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type type->reg_size[k] += field->type->reg_size[k]; } - type->dimx += field->type->dimx * field->type->dimy * get_array_size(field->type); + type->dimx += field->type->dimx * field->type->dimy * hlsl_get_multiarray_size(field->type); } break; } @@ -317,7 +333,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e vkd3d_free(type); return NULL; } - type->type = type_class; + type->class = type_class; type->base_type = base_type; type->dimx = dimx; type->dimy = dimy; @@ -330,7 +346,7 @@ static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, e static bool type_is_single_component(const struct hlsl_type *type) { - return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_OBJECT; + return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_OBJECT; } /* Given a type and a component index, this function moves one step through the path required to @@ -349,7 +365,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, assert(!type_is_single_component(type)); assert(index < hlsl_type_component_count(type)); - switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: assert(index < type->dimx); @@ -427,7 +443,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; } - if (!(deref->path = hlsl_alloc(ctx, sizeof(*deref->path) * deref->path_len))) + if (!(deref->path = hlsl_calloc(ctx, deref->path_len, sizeof(*deref->path)))) { deref->var = NULL; deref->path_len = 0; @@ -437,6 +453,71 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl return true; } +bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain) +{ + struct hlsl_ir_index *index; + struct hlsl_ir_load *load; + unsigned int chain_len, i; + struct hlsl_ir_node *ptr; + + deref->path = NULL; + deref->path_len = 0; + deref->offset.node = NULL; + + assert(chain); + if (chain->type == HLSL_IR_INDEX) + assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); + + /* Find the length of the index chain */ + chain_len = 0; + ptr = chain; + while (ptr->type == HLSL_IR_INDEX) + { + index = hlsl_ir_index(ptr); + + chain_len++; + ptr = index->val.node; + } + + if (ptr->type != HLSL_IR_LOAD) + { + hlsl_error(ctx, &chain->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid l-value."); + return false; + } + load = hlsl_ir_load(ptr); + + if (!init_deref(ctx, deref, load->src.var, load->src.path_len + chain_len)) + return false; + + for (i = 0; i < load->src.path_len; ++i) + hlsl_src_from_node(&deref->path[i], load->src.path[i].node); + + chain_len = 0; + ptr = chain; + while (ptr->type == HLSL_IR_INDEX) + { + unsigned int p = deref->path_len - 1 - chain_len; + + index = hlsl_ir_index(ptr); + if (hlsl_index_is_noncontiguous(index)) + { + hlsl_src_from_node(&deref->path[p], deref->path[p + 1].node); + hlsl_src_remove(&deref->path[p + 1]); + hlsl_src_from_node(&deref->path[p + 1], index->idx.node); + } + else + { + hlsl_src_from_node(&deref->path[p], index->idx.node); + } + + chain_len++; + ptr = index->val.node; + } + assert(deref->path_len == load->src.path_len + chain_len); + + return true; +} + struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { struct hlsl_type *type; @@ -459,9 +540,9 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl { unsigned int path_len, path_index, deref_path_len, i; struct hlsl_type *path_type; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c; - list_init(&block->instrs); + hlsl_block_init(block); path_len = 0; path_type = hlsl_deref_get_type(ctx, prefix); @@ -487,12 +568,12 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl if (!(c = hlsl_new_uint_constant(ctx, next_index, loc))) { - hlsl_free_instr_list(&block->instrs); + hlsl_block_cleanup(block); return false; } - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c); - hlsl_src_from_node(&deref->path[deref_path_len++], &c->node); + hlsl_src_from_node(&deref->path[deref_path_len++], c); } assert(deref_path_len == deref->path_len); @@ -505,7 +586,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { assert(idx); - switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: return hlsl_get_scalar_type(ctx, type->base_type); @@ -523,8 +604,8 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { struct hlsl_ir_constant *c = hlsl_ir_constant(idx); - assert(c->value[0].u < type->e.record.field_count); - return type->e.record.fields[c->value[0].u].type; + assert(c->value.u[0].u < type->e.record.field_count); + return type->e.record.fields[c->value.u[0].u].type; } default: @@ -539,7 +620,7 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_ARRAY; + type->class = HLSL_CLASS_ARRAY; type->modifiers = basic_type->modifiers; type->e.array.elements_count = array_size; type->e.array.type = basic_type; @@ -559,7 +640,7 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_STRUCT; + type->class = HLSL_CLASS_STRUCT; type->base_type = HLSL_TYPE_VOID; type->name = name; type->dimy = 1; @@ -579,7 +660,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_OBJECT; + type->class = HLSL_CLASS_OBJECT; type->base_type = HLSL_TYPE_TEXTURE; type->dimx = 4; type->dimy = 1; @@ -597,7 +678,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim if (!(type = vkd3d_calloc(1, sizeof(*type)))) return NULL; - type->type = HLSL_CLASS_OBJECT; + type->class = HLSL_CLASS_OBJECT; type->base_type = HLSL_TYPE_UAV; type->dimx = format->dimx; type->dimy = 1; @@ -614,6 +695,8 @@ static const char * get_case_insensitive_typename(const char *name) { "dword", "float", + "matrix", + "vector", }; unsigned int i; @@ -677,7 +760,7 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha unsigned int hlsl_type_component_count(const struct hlsl_type *type) { - switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -709,7 +792,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 if (t1 == t2) return true; - if (t1->type != t2->type) + if (t1->class != t2->class) return false; if (t1->base_type != t2->base_type) return false; @@ -729,7 +812,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; if (t1->dimy != t2->dimy) return false; - if (t1->type == HLSL_CLASS_STRUCT) + if (t1->class == HLSL_CLASS_STRUCT) { size_t i; @@ -748,7 +831,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 return false; } } - if (t1->type == HLSL_CLASS_ARRAY) + if (t1->class == HLSL_CLASS_ARRAY) return t1->e.array.elements_count == t2->e.array.elements_count && hlsl_types_are_equal(t1->e.array.type, t2->e.array.type); @@ -772,7 +855,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, return NULL; } } - type->type = old->type; + type->class = old->class; type->base_type = old->base_type; type->dimx = old->dimx; type->dimy = old->dimy; @@ -781,7 +864,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, type->modifiers |= default_majority; type->sampler_dim = old->sampler_dim; type->is_minimum_precision = old->is_minimum_precision; - switch (old->type) + switch (old->class) { case HLSL_CLASS_ARRAY: if (!(type->e.array.type = hlsl_type_clone(ctx, old->e.array.type, default_majority, modifiers))) @@ -799,7 +882,7 @@ struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, type->e.record.field_count = field_count; - if (!(type->e.record.fields = hlsl_alloc(ctx, field_count * sizeof(*type->e.record.fields)))) + if (!(type->e.record.fields = hlsl_calloc(ctx, field_count, sizeof(*type->e.record.fields)))) { vkd3d_free((void *)type->name); vkd3d_free(type); @@ -848,40 +931,58 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) return true; } -struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, +struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *cast; - cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, *loc); + cast = hlsl_new_unary_expr(ctx, HLSL_OP1_CAST, node, loc); if (cast) cast->data_type = type; - return hlsl_ir_expr(cast); + return cast; } -struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) +struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) { /* Use a cast to the same type as a makeshift identity expression. */ return hlsl_new_cast(ctx, node, node->data_type, &node->loc); } struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, - const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, const struct hlsl_reg_reservation *reg_reservation) { struct hlsl_ir_var *var; + unsigned int k; if (!(var = hlsl_alloc(ctx, sizeof(*var)))) return NULL; var->name = name; var->data_type = type; - var->loc = loc; + var->loc = *loc; if (semantic) var->semantic = *semantic; var->storage_modifiers = modifiers; if (reg_reservation) var->reg_reservation = *reg_reservation; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + unsigned int i, obj_count = type->reg_size[k]; + + if (obj_count == 0) + continue; + + if (!(var->objects_usage[k] = hlsl_calloc(ctx, obj_count, sizeof(*var->objects_usage[0])))) + { + for (i = 0; i < k; ++i) + vkd3d_free(var->objects_usage[i]); + vkd3d_free(var); + return NULL; + } + } + return var; } @@ -901,7 +1002,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem hlsl_release_string_buffer(ctx, string); return NULL; } - var = hlsl_new_var(ctx, name, type, *loc, NULL, 0, NULL); + var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); hlsl_release_string_buffer(ctx, string); if (var) list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); @@ -910,7 +1011,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem static bool type_is_single_reg(const struct hlsl_type *type) { - return type->type == HLSL_CLASS_SCALAR || type->type == HLSL_CLASS_VECTOR; + return type->class == HLSL_CLASS_SCALAR || type->class == HLSL_CLASS_VECTOR; } bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other) @@ -964,7 +1065,7 @@ static void init_node(struct hlsl_ir_node *node, enum hlsl_ir_node_type type, list_init(&node->uses); } -struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) +struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs) { struct hlsl_deref lhs_deref; @@ -972,7 +1073,7 @@ struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir return hlsl_new_store_index(ctx, &lhs_deref, NULL, rhs, 0, &rhs->loc); } -struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, +struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc) { struct hlsl_ir_store *store; @@ -1001,35 +1102,35 @@ struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hl writemask = (1 << rhs->data_type->dimx) - 1; store->writemask = writemask; - return store; + return &store->node; } -struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) { struct hlsl_block comp_path_block; struct hlsl_ir_store *store; - list_init(&block->instrs); + hlsl_block_init(block); if (!(store = hlsl_alloc(ctx, sizeof(*store)))) - return NULL; + return false; init_node(&store->node, HLSL_IR_STORE, NULL, &rhs->loc); if (!init_deref_from_component_index(ctx, &comp_path_block, &store->lhs, lhs, comp, &rhs->loc)) { vkd3d_free(store); - return NULL; + return false; } - list_move_tail(&block->instrs, &comp_path_block.instrs); + hlsl_block_add_block(block, &comp_path_block); hlsl_src_from_node(&store->rhs, rhs); if (type_is_single_reg(rhs->data_type)) store->writemask = (1 << rhs->data_type->dimx) - 1; - list_add_tail(&block->instrs, &store->node.entry); + hlsl_block_add_instr(block, &store->node); - return store; + return true; } struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, @@ -1045,66 +1146,54 @@ struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function return &call->node; } -struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, - const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_constant *c; - assert(type->type <= HLSL_CLASS_VECTOR); + assert(type->class <= HLSL_CLASS_VECTOR); if (!(c = hlsl_alloc(ctx, sizeof(*c)))) return NULL; init_node(&c->node, HLSL_IR_CONSTANT, type, loc); + c->value = *value; - return c; + return &c->node; } -struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) - c->value[0].u = b ? ~0u : 0; + struct hlsl_constant_value value; - return c; + value.u[0].u = b ? ~0u : 0; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &value, loc); } -struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, +struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; + struct hlsl_constant_value value; - if ((c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - c->value[0].f = f; - - return c; + value.u[0].f = f; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), &value, loc); } -struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, - const struct vkd3d_shader_location *loc) +struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc); - - if (c) - c->value[0].i = n; + struct hlsl_constant_value value; - return c; + value.u[0].i = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &value, loc); } -struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, +struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; - - c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + struct hlsl_constant_value value; - if (c) - c->value[0].u = n; - - return c; + value.u[0].u = n; + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); } struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -1124,11 +1213,11 @@ struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op } struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *arg, struct vkd3d_shader_location loc) + struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg}; - return hlsl_new_expr(ctx, op, operands, arg->data_type, &loc); + return hlsl_new_expr(ctx, op, operands, arg->data_type, loc); } struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, @@ -1140,17 +1229,21 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } -struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_if *iff; if (!(iff = hlsl_alloc(ctx, sizeof(*iff)))) return NULL; - init_node(&iff->node, HLSL_IR_IF, NULL, &loc); + init_node(&iff->node, HLSL_IR_IF, NULL, loc); hlsl_src_from_node(&iff->condition, condition); - list_init(&iff->then_instrs.instrs); - list_init(&iff->else_instrs.instrs); - return iff; + hlsl_block_init(&iff->then_block); + hlsl_block_add_block(&iff->then_block, then_block); + hlsl_block_init(&iff->else_block); + if (else_block) + hlsl_block_add_block(&iff->else_block, else_block); + return &iff->node; } struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, @@ -1183,23 +1276,36 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; } +struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc) +{ + /* This deref can only exists temporarily because it is not the real owner of its members. */ + struct hlsl_deref tmp_deref; + + assert(deref->path_len >= 1); + + tmp_deref = *deref; + tmp_deref.path_len = deref->path_len - 1; + return hlsl_new_load_index(ctx, &tmp_deref, NULL, loc); +} + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct vkd3d_shader_location loc) + const struct vkd3d_shader_location *loc) { struct hlsl_deref var_deref; hlsl_init_simple_deref_from_var(&var_deref, var); - return hlsl_new_load_index(ctx, &var_deref, NULL, &loc); + return hlsl_new_load_index(ctx, &var_deref, NULL, loc); } -struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc) { struct hlsl_type *type, *comp_type; struct hlsl_block comp_path_block; struct hlsl_ir_load *load; - list_init(&block->instrs); + hlsl_block_init(block); if (!(load = hlsl_alloc(ctx, sizeof(*load)))) return NULL; @@ -1213,14 +1319,14 @@ struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_b vkd3d_free(load); return NULL; } - list_move_tail(&block->instrs, &comp_path_block.instrs); + hlsl_block_add_block(block, &comp_path_block); - list_add_tail(&block->instrs, &load->node.entry); + hlsl_block_add_instr(block, &load->node); - return load; + return &load->node; } -struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, +struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_load *load; @@ -1229,24 +1335,37 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, return NULL; init_node(&load->node, HLSL_IR_RESOURCE_LOAD, params->format, loc); load->load_type = params->type; - if (!hlsl_copy_deref(ctx, &load->resource, ¶ms->resource)) + + if (!hlsl_init_deref_from_index_chain(ctx, &load->resource, params->resource)) { vkd3d_free(load); return NULL; } - if (!hlsl_copy_deref(ctx, &load->sampler, ¶ms->sampler)) + + if (params->sampler) { - hlsl_cleanup_deref(&load->resource); - vkd3d_free(load); - return NULL; + if (!hlsl_init_deref_from_index_chain(ctx, &load->sampler, params->sampler)) + { + hlsl_cleanup_deref(&load->resource); + vkd3d_free(load); + return NULL; + } } + hlsl_src_from_node(&load->coords, params->coords); + hlsl_src_from_node(&load->sample_index, params->sample_index); hlsl_src_from_node(&load->texel_offset, params->texel_offset); hlsl_src_from_node(&load->lod, params->lod); - return load; + hlsl_src_from_node(&load->ddx, params->ddx); + hlsl_src_from_node(&load->ddy, params->ddy); + hlsl_src_from_node(&load->cmp, params->cmp); + load->sampling_dim = params->sampling_dim; + if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) + load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; + return &load->node; } -struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) { struct hlsl_ir_resource_store *store; @@ -1257,10 +1376,10 @@ struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, con hlsl_copy_deref(ctx, &store->resource, resource); hlsl_src_from_node(&store->coords, coords); hlsl_src_from_node(&store->value, value); - return store; + return &store->node; } -struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) { struct hlsl_ir_swizzle *swizzle; @@ -1275,29 +1394,66 @@ struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); hlsl_src_from_node(&swizzle->val, val); swizzle->swizzle = s; - return swizzle; + return &swizzle->node; +} + +bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) +{ + struct hlsl_type *type = index->val.node->data_type; + + return type->class == HLSL_CLASS_MATRIX && !hlsl_type_is_row_major(type); +} + +bool hlsl_index_is_resource_access(struct hlsl_ir_index *index) +{ + return index->val.node->data_type->class == HLSL_CLASS_OBJECT; +} + +struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type = val->data_type; + struct hlsl_ir_index *index; + + if (!(index = hlsl_alloc(ctx, sizeof(*index)))) + return NULL; + + if (type->class == HLSL_CLASS_OBJECT) + type = type->e.resource_format; + else if (type->class == HLSL_CLASS_MATRIX) + type = hlsl_get_vector_type(ctx, type->base_type, type->dimx); + else + type = hlsl_get_element_type_from_path_index(ctx, type, idx); + + init_node(&index->node, HLSL_IR_INDEX, type, loc); + hlsl_src_from_node(&index->val, val); + hlsl_src_from_node(&index->idx, idx); + return &index->node; } -struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, + const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump; if (!(jump = hlsl_alloc(ctx, sizeof(*jump)))) return NULL; - init_node(&jump->node, HLSL_IR_JUMP, NULL, &loc); + init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; - return jump; + return &jump->node; } -struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc) +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + struct hlsl_block *block, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop; if (!(loop = hlsl_alloc(ctx, sizeof(*loop)))) return NULL; - init_node(&loop->node, HLSL_IR_LOOP, NULL, &loc); - list_init(&loop->body.instrs); - return loop; + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); + hlsl_block_init(&loop->body); + hlsl_block_add_block(&loop->body, block); + return &loop->node; } struct clone_instr_map @@ -1319,11 +1475,13 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_ir_node *src; struct hlsl_ir_node *dst; + hlsl_block_init(dst_block); + LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry) { if (!(dst = clone_instr(ctx, map, src))) { - hlsl_free_instr_list(&dst_block->instrs); + hlsl_block_cleanup(dst_block); return false; } list_add_tail(&dst_block->instrs, &dst->entry); @@ -1332,7 +1490,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, { if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) { - hlsl_free_instr_list(&dst_block->instrs); + hlsl_block_cleanup(dst_block); return false; } @@ -1390,12 +1548,7 @@ static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) { - struct hlsl_ir_constant *dst; - - if (!(dst = hlsl_new_constant(ctx, src->node.data_type, &src->node.loc))) - return NULL; - memcpy(dst->value, src->value, sizeof(src->value)); - return &dst->node; + return hlsl_new_constant(ctx, src->node.data_type, &src->value, &src->node.loc); } static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src) @@ -1411,27 +1564,30 @@ static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) { - struct hlsl_ir_if *dst; + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *dst; - if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) + if (!clone_block(ctx, &then_block, &src->then_block, map)) + return NULL; + if (!clone_block(ctx, &else_block, &src->else_block, map)) + { + hlsl_block_cleanup(&then_block); return NULL; + } - if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) - || !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map)) + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), &then_block, &else_block, &src->node.loc))) { - hlsl_free_instr(&dst->node); + hlsl_block_cleanup(&then_block); + hlsl_block_cleanup(&else_block); return NULL; } - return &dst->node; + + return dst; } static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) { - struct hlsl_ir_jump *dst; - - if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) - return NULL; - return &dst->node; + return hlsl_new_jump(ctx, src->type, &src->node.loc); } static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) @@ -1452,16 +1608,18 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) { - struct hlsl_ir_loop *dst; + struct hlsl_ir_node *dst; + struct hlsl_block body; - if (!(dst = hlsl_new_loop(ctx, src->node.loc))) + if (!clone_block(ctx, &body, &src->body, map)) return NULL; - if (!clone_block(ctx, &dst->body, &src->body, map)) + + if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) { - hlsl_free_instr(&dst->node); + hlsl_block_cleanup(&body); return NULL; } - return &dst->node; + return dst; } static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, @@ -1486,7 +1644,12 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, } clone_src(map, &dst->coords, &src->coords); clone_src(map, &dst->lod, &src->lod); + clone_src(map, &dst->ddx, &src->ddx); + clone_src(map, &dst->ddy, &src->ddy); + clone_src(map, &dst->sample_index, &src->sample_index); + clone_src(map, &dst->cmp, &src->cmp); clone_src(map, &dst->texel_offset, &src->texel_offset); + dst->sampling_dim = src->sampling_dim; return &dst->node; } @@ -1529,12 +1692,19 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_swizzle *src) { - struct hlsl_ir_swizzle *dst; + return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, + map_instr(map, src->val.node), &src->node.loc); +} - if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, - map_instr(map, src->val.node), &src->node.loc))) +static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, + struct hlsl_ir_index *src) +{ + struct hlsl_ir_node *dst; + + if (!(dst = hlsl_new_index(ctx, map_instr(map, src->val.node), map_instr(map, src->idx.node), + &src->node.loc))) return NULL; - return &dst->node; + return dst; } static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, @@ -1554,6 +1724,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_IF: return clone_if(ctx, map, hlsl_ir_if(instr)); + case HLSL_IR_INDEX: + return clone_index(ctx, map, hlsl_ir_index(instr)); + case HLSL_IR_JUMP: return clone_jump(ctx, hlsl_ir_jump(instr)); @@ -1593,13 +1766,12 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) { + struct hlsl_ir_node *constant, *store; struct hlsl_ir_function_decl *decl; - struct hlsl_ir_constant *constant; - struct hlsl_ir_store *store; if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) return NULL; - list_init(&decl->body.instrs); + hlsl_block_init(&decl->body); decl->return_type = return_type; decl->parameters = *parameters; decl->loc = *loc; @@ -1620,17 +1792,17 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, if (!(constant = hlsl_new_bool_constant(ctx, false, loc))) return decl; - list_add_tail(&decl->body.instrs, &constant->node.entry); + hlsl_block_add_instr(&decl->body, constant); - if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, &constant->node))) + if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, constant))) return decl; - list_add_tail(&decl->body.instrs, &store->node.entry); + hlsl_block_add_instr(&decl->body, store); return decl; } struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc) + const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) { struct hlsl_buffer *buffer; @@ -1640,7 +1812,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type buffer->name = name; if (reservation) buffer->reservation = *reservation; - buffer->loc = loc; + buffer->loc = *loc; list_add_tail(&ctx->buffers, &buffer->entry); return buffer; } @@ -1698,10 +1870,10 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls { int r; - if ((r = vkd3d_u32_compare(t1->type, t2->type))) + if ((r = vkd3d_u32_compare(t1->class, t2->class))) { - if (!((t1->type == HLSL_CLASS_SCALAR && t2->type == HLSL_CLASS_VECTOR) - || (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_SCALAR))) + if (!((t1->class == HLSL_CLASS_SCALAR && t2->class == HLSL_CLASS_VECTOR) + || (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_SCALAR))) return r; } if ((r = vkd3d_u32_compare(t1->base_type, t2->base_type))) @@ -1718,7 +1890,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls return r; if ((r = vkd3d_u32_compare(t1->dimy, t2->dimy))) return r; - if (t1->type == HLSL_CLASS_STRUCT) + if (t1->class == HLSL_CLASS_STRUCT) { size_t i; @@ -1738,7 +1910,7 @@ static int compare_param_hlsl_types(const struct hlsl_type *t1, const struct hls } return 0; } - if (t1->type == HLSL_CLASS_ARRAY) + if (t1->class == HLSL_CLASS_ARRAY) { if ((r = vkd3d_u32_compare(t1->e.array.elements_count, t2->e.array.elements_count))) return r; @@ -1768,7 +1940,7 @@ static int compare_function_decl_rb(const void *key, const struct rb_entry *entr struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) { - struct vkd3d_string_buffer *string; + struct vkd3d_string_buffer *string, *inner_string; static const char *const base_types[] = { @@ -1789,7 +1961,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru return string; } - switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: assert(type->base_type < ARRAY_SIZE(base_types)); @@ -1808,10 +1980,9 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru case HLSL_CLASS_ARRAY: { - struct vkd3d_string_buffer *inner_string; const struct hlsl_type *t; - for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) ; if ((inner_string = hlsl_type_to_string(ctx, t))) @@ -1820,7 +1991,7 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru hlsl_release_string_buffer(ctx, inner_string); } - for (t = type; t->type == HLSL_CLASS_ARRAY; t = t->e.array.type) + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) { if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) vkd3d_string_buffer_printf(string, "[]"); @@ -1860,13 +2031,26 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru assert(type->sampler_dim < ARRAY_SIZE(dimensions)); assert(type->e.resource_format->base_type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "Texture%s<%s%u>", dimensions[type->sampler_dim], - base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); + vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } return string; case HLSL_TYPE_UAV: - vkd3d_string_buffer_printf(string, "RWTexture%s<%s%u>", dimensions[type->sampler_dim], - base_types[type->e.resource_format->base_type], type->e.resource_format->dimx); + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + vkd3d_string_buffer_printf(string, "RWBuffer"); + else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); + else + vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource_format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } return string; default: @@ -1943,6 +2127,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) "HLSL_IR_CONSTANT", "HLSL_IR_EXPR", "HLSL_IR_IF", + "HLSL_IR_INDEX", "HLSL_IR_LOAD", "HLSL_IR_LOOP", "HLSL_IR_JUMP", @@ -2107,7 +2292,7 @@ static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hl vkd3d_string_buffer_printf(buffer, "{"); for (x = 0; x < type->dimx; ++x) { - const union hlsl_constant_value *value = &constant->value[x]; + const union hlsl_constant_value_component *value = &constant->value.u[x]; switch (type->base_type) { @@ -2168,6 +2353,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_SIN] = "sin", [HLSL_OP1_SIN_REDUCED] = "sin_reduced", [HLSL_OP1_SQRT] = "sqrt", + [HLSL_OP1_TRUNC] = "trunc", [HLSL_OP2_ADD] = "+", [HLSL_OP2_BIT_AND] = "&", @@ -2214,9 +2400,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "if ("); dump_src(buffer, &if_node->condition); vkd3d_string_buffer_printf(buffer, ") {\n"); - dump_instr_list(ctx, buffer, &if_node->then_instrs.instrs); + dump_instr_list(ctx, buffer, &if_node->then_block.instrs); vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); - dump_instr_list(ctx, buffer, &if_node->else_instrs.instrs); + dump_instr_list(ctx, buffer, &if_node->else_block.instrs); vkd3d_string_buffer_printf(buffer, " %10s }", ""); } @@ -2255,7 +2441,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru { [HLSL_RESOURCE_LOAD] = "load_resource", [HLSL_RESOURCE_SAMPLE] = "sample", + [HLSL_RESOURCE_SAMPLE_CMP] = "sample_cmp", + [HLSL_RESOURCE_SAMPLE_CMP_LZ] = "sample_cmp_lz", [HLSL_RESOURCE_SAMPLE_LOD] = "sample_lod", + [HLSL_RESOURCE_SAMPLE_LOD_BIAS] = "sample_biased", + [HLSL_RESOURCE_SAMPLE_GRAD] = "sample_grad", [HLSL_RESOURCE_GATHER_RED] = "gather_red", [HLSL_RESOURCE_GATHER_GREEN] = "gather_green", [HLSL_RESOURCE_GATHER_BLUE] = "gather_blue", @@ -2269,6 +2459,11 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru dump_deref(buffer, &load->sampler); vkd3d_string_buffer_printf(buffer, ", coords = "); dump_src(buffer, &load->coords); + if (load->sample_index.node) + { + vkd3d_string_buffer_printf(buffer, ", sample index = "); + dump_src(buffer, &load->sample_index); + } if (load->texel_offset.node) { vkd3d_string_buffer_printf(buffer, ", offset = "); @@ -2279,6 +2474,21 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru vkd3d_string_buffer_printf(buffer, ", lod = "); dump_src(buffer, &load->lod); } + if (load->ddx.node) + { + vkd3d_string_buffer_printf(buffer, ", ddx = "); + dump_src(buffer, &load->ddx); + } + if (load->ddy.node) + { + vkd3d_string_buffer_printf(buffer, ", ddy = "); + dump_src(buffer, &load->ddy); + } + if (load->cmp.node) + { + vkd3d_string_buffer_printf(buffer, ", cmp = "); + dump_src(buffer, &load->cmp); + } vkd3d_string_buffer_printf(buffer, ")"); } @@ -2321,6 +2531,14 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls } } +static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_index *index) +{ + dump_src(buffer, &index->val); + vkd3d_string_buffer_printf(buffer, "[idx:"); + dump_src(buffer, &index->idx); + vkd3d_string_buffer_printf(buffer, "]"); +} + static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr) { if (instr->index) @@ -2348,6 +2566,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_if(ctx, buffer, hlsl_ir_if(instr)); break; + case HLSL_IR_INDEX: + dump_ir_index(buffer, hlsl_ir_index(instr)); + break; + case HLSL_IR_JUMP: dump_ir_jump(buffer, hlsl_ir_jump(instr)); break; @@ -2421,7 +2643,7 @@ void hlsl_free_type(struct hlsl_type *type) size_t i; vkd3d_free((void *)type->name); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { for (i = 0; i < type->e.record.field_count; ++i) { @@ -2447,6 +2669,11 @@ void hlsl_free_instr_list(struct list *list) hlsl_free_instr(node); } +void hlsl_block_cleanup(struct hlsl_block *block) +{ + hlsl_free_instr_list(&block->instrs); +} + static void free_ir_call(struct hlsl_ir_call *call) { vkd3d_free(call); @@ -2468,8 +2695,8 @@ static void free_ir_expr(struct hlsl_ir_expr *expr) static void free_ir_if(struct hlsl_ir_if *if_node) { - hlsl_free_instr_list(&if_node->then_instrs.instrs); - hlsl_free_instr_list(&if_node->else_instrs.instrs); + hlsl_block_cleanup(&if_node->then_block); + hlsl_block_cleanup(&if_node->else_block); hlsl_src_remove(&if_node->condition); vkd3d_free(if_node); } @@ -2487,7 +2714,7 @@ static void free_ir_load(struct hlsl_ir_load *load) static void free_ir_loop(struct hlsl_ir_loop *loop) { - hlsl_free_instr_list(&loop->body.instrs); + hlsl_block_cleanup(&loop->body); vkd3d_free(loop); } @@ -2497,7 +2724,11 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) hlsl_cleanup_deref(&load->resource); hlsl_src_remove(&load->coords); hlsl_src_remove(&load->lod); + hlsl_src_remove(&load->ddx); + hlsl_src_remove(&load->ddy); + hlsl_src_remove(&load->cmp); hlsl_src_remove(&load->texel_offset); + hlsl_src_remove(&load->sample_index); vkd3d_free(load); } @@ -2522,6 +2753,13 @@ static void free_ir_swizzle(struct hlsl_ir_swizzle *swizzle) vkd3d_free(swizzle); } +static void free_ir_index(struct hlsl_ir_index *index) +{ + hlsl_src_remove(&index->val); + hlsl_src_remove(&index->idx); + vkd3d_free(index); +} + void hlsl_free_instr(struct hlsl_ir_node *node) { assert(list_empty(&node->uses)); @@ -2544,6 +2782,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_if(hlsl_ir_if(node)); break; + case HLSL_IR_INDEX: + free_ir_index(hlsl_ir_index(node)); + break; + case HLSL_IR_JUMP: free_ir_jump(hlsl_ir_jump(node)); break; @@ -2600,7 +2842,7 @@ static void free_function_decl(struct hlsl_ir_function_decl *decl) vkd3d_free((void *)decl->attrs); vkd3d_free(decl->parameters.vars); - hlsl_free_instr_list(&decl->body.instrs); + hlsl_block_cleanup(&decl->body); vkd3d_free(decl); } @@ -2826,11 +3068,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) static const char *const sampler_names[] = { - [HLSL_SAMPLER_DIM_GENERIC] = "sampler", - [HLSL_SAMPLER_DIM_1D] = "sampler1D", - [HLSL_SAMPLER_DIM_2D] = "sampler2D", - [HLSL_SAMPLER_DIM_3D] = "sampler3D", - [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", + [HLSL_SAMPLER_DIM_GENERIC] = "sampler", + [HLSL_SAMPLER_DIM_COMPARISON] = "SamplerComparisonState", + [HLSL_SAMPLER_DIM_1D] = "sampler1D", + [HLSL_SAMPLER_DIM_2D] = "sampler2D", + [HLSL_SAMPLER_DIM_3D] = "sampler3D", + [HLSL_SAMPLER_DIM_CUBE] = "samplerCUBE", }; static const struct @@ -2844,8 +3087,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) { {"dword", HLSL_CLASS_SCALAR, HLSL_TYPE_UINT, 1, 1}, {"float", HLSL_CLASS_SCALAR, HLSL_TYPE_FLOAT, 1, 1}, - {"VECTOR", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, - {"MATRIX", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, + {"vector", HLSL_CLASS_VECTOR, HLSL_TYPE_FLOAT, 4, 1}, + {"matrix", HLSL_CLASS_MATRIX, HLSL_TYPE_FLOAT, 4, 4}, {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, @@ -2993,16 +3236,16 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const char *source_name, rb_init(&ctx->functions, compare_function_rb); - list_init(&ctx->static_initializers); + hlsl_block_init(&ctx->static_initializers); list_init(&ctx->extern_vars); list_init(&ctx->buffers); if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) + hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) return false; if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) + hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) return false; ctx->cur_buffer = ctx->globals_buffer; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index ccbf22a5801..bce48e94b24 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -21,10 +21,12 @@ #define __VKD3D_SHADER_HLSL_H #include "vkd3d_shader_private.h" -#include "wine/rbtree.h" +#include "rbtree.h" #include "d3dcommon.h" #include "d3dx9shader.h" -#include "sm4.h" + +enum vkd3d_sm4_register_type; +enum vkd3d_sm4_swizzle_type; /* The general IR structure is inspired by Mesa GLSL hir, even though the code * ends up being quite different in practice. Anyway, here comes the relevant @@ -102,18 +104,22 @@ enum hlsl_base_type enum hlsl_sampler_dim { - HLSL_SAMPLER_DIM_GENERIC, - HLSL_SAMPLER_DIM_1D, - HLSL_SAMPLER_DIM_2D, - HLSL_SAMPLER_DIM_3D, - HLSL_SAMPLER_DIM_CUBE, - HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, - HLSL_SAMPLER_DIM_1DARRAY, - HLSL_SAMPLER_DIM_2DARRAY, - HLSL_SAMPLER_DIM_2DMS, - HLSL_SAMPLER_DIM_2DMSARRAY, - HLSL_SAMPLER_DIM_CUBEARRAY, - HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_GENERIC, + HLSL_SAMPLER_DIM_COMPARISON, + HLSL_SAMPLER_DIM_1D, + HLSL_SAMPLER_DIM_2D, + HLSL_SAMPLER_DIM_3D, + HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_LAST_SAMPLER = HLSL_SAMPLER_DIM_CUBE, + HLSL_SAMPLER_DIM_1DARRAY, + HLSL_SAMPLER_DIM_2DARRAY, + HLSL_SAMPLER_DIM_2DMS, + HLSL_SAMPLER_DIM_2DMSARRAY, + HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_LAST_TEXTURE = HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_BUFFER, + HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, + HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, }; enum hlsl_regset @@ -134,16 +140,17 @@ struct hlsl_type /* Item entry in hlsl_scope->types. hlsl_type->name is used as key (if not NULL). */ struct rb_entry scope_entry; - enum hlsl_type_class type; + enum hlsl_type_class class; /* If type is <= HLSL_CLASS_LAST_NUMERIC, then base_type is <= HLSL_TYPE_LAST_SCALAR. * If type is HLSL_CLASS_OBJECT, then base_type is > HLSL_TYPE_LAST_SCALAR. * Otherwise, base_type is not used. */ enum hlsl_base_type base_type; /* If base_type is HLSL_TYPE_SAMPLER, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_SAMPLER. - * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim can have any value of the enum. - * If base_type is HLSL_TYPE_UAV, them sampler_dim must be one of HLSL_SAMPLER_DIM_1D, - * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, or HLSL_SAMPLER_DIM_2DARRAY. + * If base_type is HLSL_TYPE_TEXTURE, then sampler_dim is <= HLSL_SAMPLER_DIM_LAST_TEXTURE. + * If base_type is HLSL_TYPE_UAV, then sampler_dim must be one of HLSL_SAMPLER_DIM_1D, + * HLSL_SAMPLER_DIM_2D, HLSL_SAMPLER_DIM_3D, HLSL_SAMPLER_DIM_1DARRAY, HLSL_SAMPLER_DIM_2DARRAY, + * HLSL_SAMPLER_DIM_BUFFER, or HLSL_SAMPLER_DIM_STRUCTURED_BUFFER. * Otherwise, sampler_dim is not used */ enum hlsl_sampler_dim sampler_dim; /* Name, in case the type is a named struct or a typedef. */ @@ -207,6 +214,16 @@ struct hlsl_semantic { const char *name; uint32_t index; + + /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ + bool reported_missing; + /* In case the variable or field that stores this semantic has already reported to use a + * duplicated output semantic, this value stores the last reported index + 1. Otherwise it is 0. */ + uint32_t reported_duplicated_output_next_index; + /* In case the variable or field that stores this semantic has already reported to use a + * duplicated input semantic with incompatible values, this value stores the last reported + * index + 1. Otherwise it is 0. */ + uint32_t reported_duplicated_input_incompatible_next_index; }; /* A field within a struct type declaration, used in hlsl_type.e.fields. */ @@ -228,16 +245,21 @@ struct hlsl_struct_field size_t name_bytecode_offset; }; -/* Information of the register allocated for an instruction node or variable. +/* Information of the register(s) allocated for an instruction node or variable. * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, * just before writing the bytecode. - * For numeric registers, a writemask can be provided to indicate the reservation of only some of the - * 4 components. * The type of register (register class) is implied from its use, so it is not stored in this * struct. */ struct hlsl_reg { + /* Index of the first register allocated. */ uint32_t id; + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ + uint32_t bind_count; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ unsigned int writemask; /* Whether the register has been allocated. */ bool allocated; @@ -254,6 +276,7 @@ enum hlsl_ir_node_type HLSL_IR_CONSTANT, HLSL_IR_EXPR, HLSL_IR_IF, + HLSL_IR_INDEX, HLSL_IR_LOAD, HLSL_IR_LOOP, HLSL_IR_JUMP, @@ -342,12 +365,17 @@ struct hlsl_attribute #define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0 -/* Reservation of a specific register to a variable, field, or buffer, written in the HLSL source - * using the register(·) syntax */ +/* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a + * starting point of their allocation. They are available through the register(·) and the + * packoffset(·) syntaxes, respectivelly. + * The costant buffer offset is measured register components. */ struct hlsl_reg_reservation { - char type; - unsigned int index; + char reg_type; + unsigned int reg_index; + + char offset_type; + unsigned int offset_index; }; struct hlsl_ir_var @@ -360,8 +388,7 @@ struct hlsl_ir_var struct hlsl_buffer *buffer; /* Bitfield for storage modifiers (type modifiers are stored in data_type->modifiers). */ unsigned int storage_modifiers; - /* Optional register to be used as a starting point for the variable allocation, specified - * by the user via the register(·) syntax. */ + /* Optional reservations of registers and/or offsets for variables within constant buffers. */ struct hlsl_reg_reservation reg_reservation; /* Item entry in hlsl_scope.vars. Specifically hlsl_ctx.globals.vars if the variable is global. */ @@ -384,6 +411,13 @@ struct hlsl_ir_var * and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1]; + struct + { + bool used; + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -446,8 +480,8 @@ struct hlsl_ir_if { struct hlsl_ir_node node; struct hlsl_src condition; - struct hlsl_block then_instrs; - struct hlsl_block else_instrs; + struct hlsl_block then_block; + struct hlsl_block else_block; }; struct hlsl_ir_loop @@ -485,6 +519,7 @@ enum hlsl_ir_expr_op HLSL_OP1_SIN, HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_SQRT, + HLSL_OP1_TRUNC, HLSL_OP2_ADD, HLSL_OP2_BIT_AND, @@ -540,6 +575,12 @@ struct hlsl_ir_swizzle DWORD swizzle; }; +struct hlsl_ir_index +{ + struct hlsl_ir_node node; + struct hlsl_src val, idx; +}; + /* Reference to a variable, or a part of it (e.g. a vector within a matrix within a struct). */ struct hlsl_deref { @@ -574,7 +615,11 @@ enum hlsl_resource_load_type { HLSL_RESOURCE_LOAD, HLSL_RESOURCE_SAMPLE, + HLSL_RESOURCE_SAMPLE_CMP, + HLSL_RESOURCE_SAMPLE_CMP_LZ, HLSL_RESOURCE_SAMPLE_LOD, + HLSL_RESOURCE_SAMPLE_LOD_BIAS, + HLSL_RESOURCE_SAMPLE_GRAD, HLSL_RESOURCE_GATHER_RED, HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, @@ -586,7 +631,8 @@ struct hlsl_ir_resource_load struct hlsl_ir_node node; enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; - struct hlsl_src coords, lod, texel_offset; + struct hlsl_src coords, lod, ddx, ddy, cmp, sample_index, texel_offset; + enum hlsl_sampler_dim sampling_dim; }; struct hlsl_ir_resource_store @@ -607,13 +653,16 @@ struct hlsl_ir_store struct hlsl_ir_constant { struct hlsl_ir_node node; - union hlsl_constant_value + struct hlsl_constant_value { - uint32_t u; - int32_t i; - float f; - double d; - } value[4]; + union hlsl_constant_value_component + { + uint32_t u; + int32_t i; + float f; + double d; + } u[4]; + } value; /* Constant register of type 'c' where the constant value is stored for SM1. */ struct hlsl_reg reg; }; @@ -674,6 +723,9 @@ struct hlsl_buffer unsigned size, used_size; /* Register of type 'b' on which the buffer is allocated. */ struct hlsl_reg reg; + + bool manually_packed_elements; + bool automatically_packed_elements; }; struct hlsl_ctx @@ -744,9 +796,8 @@ struct hlsl_ctx struct hlsl_type *Void; } builtin_types; - /* List of the instruction nodes for initializing static variables; linked by the - * hlsl_ir_node.entry fields. */ - struct list static_initializers; + /* List of the instruction nodes for initializing static variables. */ + struct hlsl_block static_initializers; /* Dynamic array of constant values that appear in the shader, associated to the 'c' registers. * Only used for SM1 profiles. */ @@ -780,8 +831,9 @@ struct hlsl_resource_load_params { struct hlsl_type *format; enum hlsl_resource_load_type type; - struct hlsl_deref resource, sampler; - struct hlsl_ir_node *coords, *lod, *texel_offset; + struct hlsl_ir_node *resource, *sampler; + struct hlsl_ir_node *coords, *lod, *ddx, *ddy, *cmp, *sample_index, *texel_offset; + enum hlsl_sampler_dim sampling_dim; }; static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) @@ -850,6 +902,27 @@ static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); } +static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_INDEX); + return CONTAINING_RECORD(node, struct hlsl_ir_index, node); +} + +static inline void hlsl_block_init(struct hlsl_block *block) +{ + list_init(&block->instrs); +} + +static inline void hlsl_block_add_instr(struct hlsl_block *block, struct hlsl_ir_node *instr) +{ + list_add_tail(&block->instrs, &instr->entry); +} + +static inline void hlsl_block_add_block(struct hlsl_block *block, struct hlsl_block *add) +{ + list_move_tail(&block->instrs, &add->instrs); +} + static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) { src->node = node; @@ -873,6 +946,15 @@ static inline void *hlsl_alloc(struct hlsl_ctx *ctx, size_t size) return ptr; } +static inline void *hlsl_calloc(struct hlsl_ctx *ctx, size_t count, size_t size) +{ + void *ptr = vkd3d_calloc(count, size); + + if (!ptr) + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return ptr; +} + static inline void *hlsl_realloc(struct hlsl_ctx *ctx, void *ptr, size_t size) { void *ret = vkd3d_realloc(ptr, size); @@ -948,6 +1030,8 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) switch (dim) { case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: return 1; case HLSL_SAMPLER_DIM_1DARRAY: case HLSL_SAMPLER_DIM_2D: @@ -974,11 +1058,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); -struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var); +void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); @@ -986,6 +1071,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); +bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); void hlsl_cleanup_deref(struct hlsl_deref *deref); @@ -1012,64 +1098,73 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type); struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size); struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2); -struct hlsl_ir_constant *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, struct vkd3d_shader_location loc); + const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, - const struct vkd3d_shader_location *loc); -struct hlsl_ir_constant *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, +struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc); -struct hlsl_ir_expr *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); +struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *type, + const struct hlsl_constant_value *value, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_copy(struct hlsl_ctx *ctx, struct hlsl_ir_node *node); struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *data_type, const struct vkd3d_shader_location *loc); -struct hlsl_ir_constant *hlsl_new_float_constant(struct hlsl_ctx *ctx, +struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, const struct vkd3d_shader_location *loc); struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); -struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct vkd3d_shader_location loc); -struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n, - const struct vkd3d_shader_location *loc); -struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc); +struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct vkd3d_shader_location loc); + const struct vkd3d_shader_location *loc); struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); -struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); -struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); -struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, +struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); +struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc); -struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, +bool hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs); -struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc); -struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, +bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); +bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); + +struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + struct hlsl_block *block, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); -struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); -struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, +struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format); -struct hlsl_ir_constant *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, +struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, - struct vkd3d_shader_location loc); + const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, - const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers, + const struct vkd3d_shader_location *loc, const struct hlsl_semantic *semantic, unsigned int modifiers, const struct hlsl_reg_reservation *reg_reservation); void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, @@ -1101,6 +1196,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); +const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); +unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type); + unsigned int hlsl_combine_swizzles(unsigned int first, unsigned int second, unsigned int dim); unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second); unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask); @@ -1109,12 +1207,17 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); +bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index); bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); +bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); +bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context); bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); @@ -1124,7 +1227,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index adff1da04d8..e9ae3ccf3d3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -37,6 +37,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); %option bison-locations %option extra-type="struct hlsl_ctx *" %option never-interactive +%option nodefault %option noinput %option nounput %option noyywrap @@ -95,6 +96,7 @@ matrix {return KW_MATRIX; } namespace {return KW_NAMESPACE; } nointerpolation {return KW_NOINTERPOLATION; } out {return KW_OUT; } +packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } precise {return KW_PRECISE; } @@ -102,6 +104,8 @@ RasterizerState {return KW_RASTERIZERSTATE; } RenderTargetView {return KW_RENDERTARGETVIEW; } return {return KW_RETURN; } register {return KW_REGISTER; } +RWBuffer {return KW_RWBUFFER; } +RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } RWTexture1D {return KW_RWTEXTURE1D; } RWTexture2D {return KW_RWTEXTURE2D; } RWTexture3D {return KW_RWTEXTURE3D; } @@ -265,6 +269,10 @@ row_major {return KW_ROW_MAJOR; } return STRING; } {WS}+ {} +{ANY} { + FIXME("Malformed preprocessor line directive?\n"); + BEGIN(INITIAL); + } {NEWLINE} { FIXME("Malformed preprocessor line directive?\n"); BEGIN(INITIAL); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index fd1eaf6ec95..0e07fe578e1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -85,8 +85,8 @@ struct parse_function struct parse_if_body { - struct list *then_instrs; - struct list *else_instrs; + struct list *then_block; + struct list *else_block; }; enum parse_assign_op @@ -164,7 +164,7 @@ static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct src_comp_type = hlsl_type_get_component_type(ctx, src, k); dst_comp_type = hlsl_type_get_component_type(ctx, dst, k); - if ((src_comp_type->type != HLSL_CLASS_SCALAR || dst_comp_type->type != HLSL_CLASS_SCALAR) + if ((src_comp_type->class != HLSL_CLASS_SCALAR || dst_comp_type->class != HLSL_CLASS_SCALAR) && !hlsl_types_are_equal(src_comp_type, dst_comp_type)) return false; } @@ -196,9 +196,9 @@ static bool type_contains_only_numerics(struct hlsl_type *type) { unsigned int i; - if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_contains_only_numerics(type->e.array.type); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { for (i = 0; i < type->e.record.field_count; ++i) { @@ -207,23 +207,23 @@ static bool type_contains_only_numerics(struct hlsl_type *type) } return true; } - return type->type <= HLSL_CLASS_LAST_NUMERIC; + return type->class <= HLSL_CLASS_LAST_NUMERIC; } static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { - if (src->type <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) + if (src->class <= HLSL_CLASS_LAST_NUMERIC && src->dimx == 1 && src->dimy == 1 && type_contains_only_numerics(dst)) return true; - if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX && src->dimx >= dst->dimx && src->dimy >= dst->dimy) return true; - if ((src->type == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) + if ((src->class == HLSL_CLASS_MATRIX && src->dimx > 1 && src->dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false; - if ((dst->type == HLSL_CLASS_MATRIX && dst->dimy > 1) + if ((dst->class == HLSL_CLASS_MATRIX && dst->dimy > 1) && hlsl_type_component_count(src) != hlsl_type_component_count(dst)) return false; @@ -232,10 +232,10 @@ static bool explicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { - if ((src->type <= HLSL_CLASS_LAST_NUMERIC) != (dst->type <= HLSL_CLASS_LAST_NUMERIC)) + if ((src->class <= HLSL_CLASS_LAST_NUMERIC) != (dst->class <= HLSL_CLASS_LAST_NUMERIC)) return false; - if (src->type <= HLSL_CLASS_LAST_NUMERIC) + if (src->class <= HLSL_CLASS_LAST_NUMERIC) { /* Scalar vars can be converted to any other numeric data type */ if (src->dimx == 1 && src->dimy == 1) @@ -244,21 +244,21 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ if (dst->dimx == 1 && dst->dimy == 1) return true; - if (src->type == HLSL_CLASS_MATRIX || dst->type == HLSL_CLASS_MATRIX) + if (src->class == HLSL_CLASS_MATRIX || dst->class == HLSL_CLASS_MATRIX) { - if (src->type == HLSL_CLASS_MATRIX && dst->type == HLSL_CLASS_MATRIX) + if (src->class == HLSL_CLASS_MATRIX && dst->class == HLSL_CLASS_MATRIX) return src->dimx >= dst->dimx && src->dimy >= dst->dimy; /* Matrix-vector conversion is apparently allowed if they have * the same components count, or if the matrix is 1xN or Nx1 * and we are reducing the component count */ - if (src->type == HLSL_CLASS_VECTOR || dst->type == HLSL_CLASS_VECTOR) + if (src->class == HLSL_CLASS_VECTOR || dst->class == HLSL_CLASS_VECTOR) { if (hlsl_type_component_count(src) == hlsl_type_component_count(dst)) return true; - if ((src->type == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && - (dst->type == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) + if ((src->class == HLSL_CLASS_VECTOR || src->dimx == 1 || src->dimy == 1) && + (dst->class == HLSL_CLASS_VECTOR || dst->dimx == 1 || dst->dimy == 1)) return hlsl_type_component_count(src) >= hlsl_type_component_count(dst); } @@ -273,19 +273,19 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); } -static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; - struct hlsl_ir_expr *cast; + struct hlsl_ir_node *cast; if (hlsl_types_are_equal(src_type, dst_type)) return node; - if (src_type->type > HLSL_CLASS_VECTOR || dst_type->type > HLSL_CLASS_VECTOR) + if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) { unsigned int src_comp_count = hlsl_type_component_count(src_type); unsigned int dst_comp_count = hlsl_type_component_count(dst_type); @@ -295,9 +295,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var; unsigned int dst_idx; - broadcast = src_type->type <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; + broadcast = src_type->class <= HLSL_CLASS_LAST_NUMERIC && src_type->dimx == 1 && src_type->dimy == 1; matrix_cast = !broadcast && dst_comp_count != src_comp_count - && src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX; + && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; assert(src_comp_count >= dst_comp_count || broadcast); if (matrix_cast) { @@ -311,8 +311,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx) { + struct hlsl_ir_node *component_load; struct hlsl_type *dst_comp_type; - struct hlsl_ir_store *store; struct hlsl_block block; unsigned int src_idx; @@ -333,19 +333,19 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - if (!(load = add_load_component(ctx, instrs, node, src_idx, loc))) + if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) return NULL; - if (!(cast = hlsl_new_cast(ctx, &load->node, dst_comp_type, loc))) + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); + list_add_tail(instrs, &cast->entry); - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, &cast->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, dst_idx, cast)) return NULL; list_move_tail(instrs, &block.instrs); } - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; list_add_tail(instrs, &load->node.entry); @@ -355,8 +355,8 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, { if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc))) return NULL; - list_add_tail(instrs, &cast->node.entry); - return &cast->node; + list_add_tail(instrs, &cast->entry); + return cast; } } @@ -384,19 +384,20 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct if (dst_type->dimx * dst_type->dimy < src_type->dimx * src_type->dimy) hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION, "Implicit truncation of %s type.", - src_type->type == HLSL_CLASS_VECTOR ? "vector" : "matrix"); + src_type->class == HLSL_CLASS_VECTOR ? "vector" : "matrix"); return add_cast(ctx, instrs, node, dst_type, loc); } -static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, const struct vkd3d_shader_location loc) +static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, + const struct vkd3d_shader_location *loc) { if (modifiers & mod) { struct vkd3d_string_buffer *string; if ((string = hlsl_modifiers_to_string(ctx, mod))) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Modifier '%s' was already specified.", string->buffer); hlsl_release_string_buffer(ctx, string); return modifiers; @@ -406,26 +407,27 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, con static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) { - struct hlsl_ir_node *condition, *not; - struct hlsl_ir_jump *jump; - struct hlsl_ir_if *iff; + struct hlsl_ir_node *condition, *not, *iff, *jump; + struct hlsl_block then_block; /* E.g. "for (i = 0; ; ++i)". */ if (list_empty(cond_list)) return true; condition = node_from_list(cond_list); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, condition->loc))) + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) return false; list_add_tail(cond_list, ¬->entry); - if (!(iff = hlsl_new_if(ctx, not, condition->loc))) + hlsl_block_init(&then_block); + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) return false; - list_add_tail(cond_list, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump); - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, condition->loc))) + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; - list_add_head(&iff->then_instrs.instrs, &jump->node.entry); + list_add_tail(cond_list, &iff->entry); return true; } @@ -436,46 +438,87 @@ enum loop_type LOOP_DO_WHILE }; -static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, struct list *init, struct list *cond, - struct list *iter, struct list *body, struct vkd3d_shader_location loc) +static bool attribute_list_has_duplicates(const struct parse_attribute_list *attrs) { - struct list *list = NULL; - struct hlsl_ir_loop *loop = NULL; - struct hlsl_ir_if *cond_jump = NULL; + unsigned int i, j; - if (!(list = make_empty_list(ctx))) - goto oom; + for (i = 0; i < attrs->count; ++i) + { + for (j = i + 1; j < attrs->count; ++j) + { + if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) + return true; + } + } + + return false; +} + +static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, + struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) +{ + struct hlsl_block body_block; + struct hlsl_ir_node *loop; + unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); - if (init) - list_move_head(list, init); + /* Ignore unroll(0) attribute, and any invalid attribute. */ + for (i = 0; i < attributes->count; ++i) + { + const struct hlsl_attribute *attr = attributes->attrs[i]; + if (!strcmp(attr->name, "unroll")) + { + if (attr->args_count) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); + } + else + { + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); + } + } + else if (!strcmp(attr->name, "loop") + || !strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) + { + hlsl_fixme(ctx, loc, "Unhandled attribute %s.", attr->name); + } + else + { + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unrecognized attribute %s.", attr->name); + } + } - if (!(loop = hlsl_new_loop(ctx, loc))) + if (!init && !(init = make_empty_list(ctx))) goto oom; - list_add_tail(list, &loop->node.entry); if (!append_conditional_break(ctx, cond)) goto oom; + hlsl_block_init(&body_block); + if (type != LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); + list_move_tail(&body_block.instrs, cond); - list_move_tail(&loop->body.instrs, body); + list_move_tail(&body_block.instrs, body); if (iter) - list_move_tail(&loop->body.instrs, iter); + list_move_tail(&body_block.instrs, iter); if (type == LOOP_DO_WHILE) - list_move_tail(&loop->body.instrs, cond); + list_move_tail(&body_block.instrs, cond); + + if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) + goto oom; + list_add_tail(init, &loop->entry); - vkd3d_free(init); vkd3d_free(cond); vkd3d_free(body); - return list; + return init; oom: - vkd3d_free(loop); - vkd3d_free(cond_jump); - vkd3d_free(list); destroy_instr_list(init); destroy_instr_list(cond); destroy_instr_list(iter); @@ -500,14 +543,14 @@ static void free_parse_initializer(struct parse_initializer *initializer) vkd3d_free(initializer->args); } -static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, +static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_node *value, const char *swizzle, struct vkd3d_shader_location *loc) { unsigned int len = strlen(swizzle), component = 0; unsigned int i, set, swiz = 0; bool valid; - if (value->data_type->type == HLSL_CLASS_MATRIX) + if (value->data_type->class == HLSL_CLASS_MATRIX) { /* Matrix swizzle */ bool m_swizzle; @@ -582,224 +625,102 @@ static struct hlsl_ir_swizzle *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_ return NULL; } -static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *return_value, struct vkd3d_shader_location loc) +static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; - struct hlsl_ir_jump *jump; + struct hlsl_ir_node *jump; if (ctx->cur_function->return_var) { if (return_value) { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; - if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) - return NULL; + if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) + return false; if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) - return NULL; - list_add_after(&return_value->entry, &store->node.entry); + return false; + list_add_after(&return_value->entry, &store->entry); } else { - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); - return NULL; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Non-void functions must return a value."); + return false; } } else { if (return_value) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); } if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) - return NULL; - list_add_tail(instrs, &jump->node.entry); - - return jump; -} - -static struct hlsl_ir_load *add_load_index(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) -{ - const struct hlsl_deref *src; - struct hlsl_ir_load *load; - - if (var_instr->type == HLSL_IR_LOAD) - { - src = &hlsl_ir_load(var_instr)->src; - } - else - { - struct hlsl_ir_store *store; - struct hlsl_ir_var *var; - - if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) - return NULL; - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - src = &store->lhs; - } - - if (!(load = hlsl_new_load_index(ctx, src, idx, loc))) - return NULL; - list_add_tail(instrs, &load->node.entry); + return false; + list_add_tail(instrs, &jump->entry); - return load; + return true; } -static struct hlsl_ir_load *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) { - const struct hlsl_deref *src; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load, *store; struct hlsl_block block; + struct hlsl_ir_var *var; + struct hlsl_deref src; - if (var_instr->type == HLSL_IR_LOAD) - { - src = &hlsl_ir_load(var_instr)->src; - } - else - { - struct hlsl_ir_store *store; - struct hlsl_ir_var *var; - - if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) - return NULL; - - if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) - return NULL; - list_add_tail(instrs, &store->node.entry); + if (!(var = hlsl_new_synthetic_var(ctx, "deref", var_instr->data_type, &var_instr->loc))) + return NULL; - src = &store->lhs; - } + if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) + return NULL; + list_add_tail(instrs, &store->entry); - if (!(load = hlsl_new_load_component(ctx, &block, src, comp, loc))) + hlsl_init_simple_deref_from_var(&src, var); + if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) return NULL; list_move_tail(instrs, &block.instrs); return load; } -static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, - unsigned int idx, const struct vkd3d_shader_location loc) +static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *record, + unsigned int idx, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *index, *c; assert(idx < record->data_type->e.record.field_count); - if (!(c = hlsl_new_uint_constant(ctx, idx, &loc))) - return false; - list_add_tail(instrs, &c->node.entry); - - return !!add_load_index(ctx, instrs, record, &c->node, &loc); -} - -static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc); - -static bool add_matrix_index(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *matrix, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) -{ - struct hlsl_type *mat_type = matrix->data_type, *ret_type; - struct hlsl_deref var_deref; - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - unsigned int i; - - if (hlsl_type_is_row_major(mat_type)) - return add_load_index(ctx, instrs, matrix, index, loc); - - ret_type = hlsl_get_vector_type(ctx, mat_type->base_type, mat_type->dimx); - - if (!(var = hlsl_new_synthetic_var(ctx, "index", ret_type, loc))) + if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) return false; - hlsl_init_simple_deref_from_var(&var_deref, var); - - for (i = 0; i < mat_type->dimx; ++i) - { - struct hlsl_ir_load *column, *value; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; - struct hlsl_block block; - - if (!(c = hlsl_new_uint_constant(ctx, i, loc))) - return false; - list_add_tail(instrs, &c->node.entry); - - if (!(column = add_load_index(ctx, instrs, matrix, &c->node, loc))) - return false; - - if (!(value = add_load_index(ctx, instrs, &column->node, index, loc))) - return false; + list_add_tail(instrs, &c->entry); - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i, &value->node))) - return false; - list_move_tail(instrs, &block.instrs); - } - - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(index = hlsl_new_index(ctx, record, c, loc))) return false; - list_add_tail(instrs, &load->node.entry); + list_add_tail(instrs, &index->entry); return true; } -static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct list *instrs, - struct hlsl_ir_node *index, unsigned int dim_count, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_load *coords_load; - struct hlsl_deref coords_deref; - struct hlsl_ir_constant *zero; - struct hlsl_ir_store *store; - struct hlsl_ir_var *coords; - - if (!(coords = hlsl_new_synthetic_var(ctx, "coords", - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) - return NULL; - - hlsl_init_simple_deref_from_var(&coords_deref, coords); - if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) - return NULL; - list_add_tail(instrs, &zero->node.entry); - - if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, &zero->node, 1u << dim_count, loc))) - return NULL; - list_add_tail(instrs, &store->node.entry); - - if (!(coords_load = hlsl_new_var_load(ctx, coords, *loc))) - return NULL; - list_add_tail(instrs, &coords_load->node.entry); - - return &coords_load->node; -} +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc); -static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, +static bool add_array_access(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *array, struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc) { const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; - struct hlsl_ir_expr *cast; + struct hlsl_ir_node *return_index, *cast; - if (expr_type->type == HLSL_CLASS_OBJECT + if (expr_type->class == HLSL_CLASS_OBJECT && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; unsigned int dim_count = hlsl_sampler_dim_count(expr_type->sampler_dim); - /* Only HLSL_IR_LOAD can return an object. */ - struct hlsl_ir_load *object_load = hlsl_ir_load(array); - struct hlsl_ir_resource_load *resource_load; - if (index_type->type > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) + if (index_type->class > HLSL_CLASS_VECTOR || index_type->dimx != dim_count) { struct vkd3d_string_buffer *string; @@ -814,20 +735,14 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count), &index->loc))) return false; - if (!(index = add_zero_mipmap_level(ctx, instrs, index, dim_count, loc))) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; + list_add_tail(instrs, &return_index->entry); - load_params.format = expr_type->e.resource_format; - load_params.resource = object_load->src; - load_params.coords = index; - - if (!(resource_load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &resource_load->node.entry); return true; } - if (index_type->type != HLSL_CLASS_SCALAR) + if (index_type->class != HLSL_CLASS_SCALAR) { hlsl_error(ctx, &index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Array index is not scalar."); return false; @@ -835,23 +750,21 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls if (!(cast = hlsl_new_cast(ctx, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &index->loc))) return false; - list_add_tail(instrs, &cast->node.entry); - index = &cast->node; + list_add_tail(instrs, &cast->entry); + index = cast; - if (expr_type->type == HLSL_CLASS_MATRIX) - return add_matrix_index(ctx, instrs, array, index, loc); - - if (expr_type->type != HLSL_CLASS_ARRAY && expr_type->type != HLSL_CLASS_VECTOR) + if (expr_type->class != HLSL_CLASS_ARRAY && expr_type->class != HLSL_CLASS_VECTOR && expr_type->class != HLSL_CLASS_MATRIX) { - if (expr_type->type == HLSL_CLASS_SCALAR) + if (expr_type->class == HLSL_CLASS_SCALAR) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Scalar expressions cannot be array-indexed."); else hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_INDEX, "Expression cannot be array-indexed."); return false; } - if (!add_load_index(ctx, instrs, array, index, loc)) + if (!(return_index = hlsl_new_index(ctx, array, index, loc))) return false; + list_add_tail(instrs, &return_index->entry); return true; } @@ -877,12 +790,12 @@ static struct hlsl_type *apply_type_modifiers(struct hlsl_ctx *ctx, struct hlsl_ if (!(*modifiers & HLSL_MODIFIERS_MAJORITY_MASK) && !(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK) - && type->type == HLSL_CLASS_MATRIX) + && type->class == HLSL_CLASS_MATRIX) { if (!(default_majority = ctx->matrix_majority) && force_majority) default_majority = HLSL_MODIFIER_COLUMN_MAJOR; } - else if (type->type != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) + else if (type->class != HLSL_CLASS_MATRIX && (*modifiers & HLSL_MODIFIERS_MAJORITY_MASK)) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "'row_major' and 'column_major' modifiers are only allowed for matrices."); @@ -923,7 +836,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct parse_variable_def *v, *v_next; size_t i = 0; - if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); memset(fields, 0, sizeof(*fields)); @@ -939,7 +852,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->type = type; - if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { for (k = 0; k < v->arrays.count; ++k) unbounded_res_array |= (v->arrays.sizes[k] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -983,6 +896,9 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Illegal initializer on a struct field."); free_parse_initializer(&v->initializer); } + if (v->reg_reservation.offset_type) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed inside struct definitions."); vkd3d_free(v); } vkd3d_free(defs); @@ -1052,18 +968,23 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, } static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, - struct parse_parameter *param, const struct vkd3d_shader_location loc) + struct parse_parameter *param, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var; - if (param->type->type == HLSL_CLASS_MATRIX) + if (param->type->class == HLSL_CLASS_MATRIX) assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, "Parameter '%s' is declared as both \"out\" and \"uniform\".", param->name); - if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ¶m->reg_reservation))) + if (param->reg_reservation.offset_type) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on function parameters."); + + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, + ¶m->reg_reservation))) return false; var->is_param = 1; @@ -1084,12 +1005,61 @@ static struct hlsl_reg_reservation parse_reg_reservation(const char *reg_string) { struct hlsl_reg_reservation reservation = {0}; - if (!sscanf(reg_string + 1, "%u", &reservation.index)) + if (!sscanf(reg_string + 1, "%u", &reservation.reg_index)) { FIXME("Unsupported register reservation syntax.\n"); return reservation; } - reservation.type = reg_string[0]; + reservation.reg_type = ascii_tolower(reg_string[0]); + return reservation; +} + +static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const char *reg_string, + const char *swizzle, const struct vkd3d_shader_location *loc) +{ + struct hlsl_reg_reservation reservation = {0}; + char *endptr; + + if (ctx->profile->major_version < 4) + return reservation; + + reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); + if (*endptr) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() syntax."); + return reservation; + } + + reservation.offset_type = ascii_tolower(reg_string[0]); + if (reservation.offset_type != 'c') + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Only 'c' registers are allowed in packoffset()."); + return reservation; + } + + reservation.offset_index *= 4; + + if (swizzle) + { + if (strlen(swizzle) != 1) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() component \"%s\".", swizzle); + + if (swizzle[0] == 'x' || swizzle[0] == 'r') + reservation.offset_index += 0; + else if (swizzle[0] == 'y' || swizzle[0] == 'g') + reservation.offset_index += 1; + else if (swizzle[0] == 'z' || swizzle[0] == 'b') + reservation.offset_index += 2; + else if (swizzle[0] == 'w' || swizzle[0] == 'a') + reservation.offset_index += 3; + else + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() component \"%s\".", swizzle); + } + return reservation; } @@ -1122,53 +1092,37 @@ static struct list *make_list(struct hlsl_ctx *ctx, struct hlsl_ir_node *node) return list; } -static unsigned int evaluate_static_expression(struct hlsl_ir_node *node) +static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) { - if (node->data_type->type != HLSL_CLASS_SCALAR) + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; + unsigned int ret = 0; + bool progress; + + if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) return 0; - switch (node->type) + do { - case HLSL_IR_CONSTANT: - { - struct hlsl_ir_constant *constant = hlsl_ir_constant(node); - const union hlsl_constant_value *value = &constant->value[0]; - - switch (constant->node.data_type->base_type) - { - case HLSL_TYPE_UINT: - return value->u; - case HLSL_TYPE_INT: - return value->i; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return value->f; - case HLSL_TYPE_DOUBLE: - return value->d; - case HLSL_TYPE_BOOL: - return !!value->u; - default: - vkd3d_unreachable(); - } - } - - case HLSL_IR_EXPR: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_SWIZZLE: - FIXME("Unhandled type %s.\n", hlsl_node_type_to_string(node->type)); - return 0; + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); + progress |= hlsl_copy_propagation_execute(ctx, block); + } while (progress); - case HLSL_IR_CALL: - case HLSL_IR_IF: - case HLSL_IR_JUMP: - case HLSL_IR_LOOP: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - vkd3d_unreachable(); + node = node_from_list(&block->instrs); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); + ret = constant->value.u[0].u; + } + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Failed to evaluate constant expression %d.", node->type); } - vkd3d_unreachable(); + return ret; } static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) @@ -1180,20 +1134,20 @@ static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t if ((t1->dimx == 1 && t1->dimy == 1) || (t2->dimx == 1 && t2->dimy == 1)) return true; - if (t1->type == HLSL_CLASS_VECTOR && t2->type == HLSL_CLASS_VECTOR) + if (t1->class == HLSL_CLASS_VECTOR && t2->class == HLSL_CLASS_VECTOR) return true; - if (t1->type == HLSL_CLASS_MATRIX || t2->type == HLSL_CLASS_MATRIX) + if (t1->class == HLSL_CLASS_MATRIX || t2->class == HLSL_CLASS_MATRIX) { /* Matrix-vector conversion is apparently allowed if either they have the same components count or the matrix is nx1 or 1xn */ - if (t1->type == HLSL_CLASS_VECTOR || t2->type == HLSL_CLASS_VECTOR) + if (t1->class == HLSL_CLASS_VECTOR || t2->class == HLSL_CLASS_VECTOR) { if (hlsl_type_component_count(t1) == hlsl_type_component_count(t2)) return true; - return (t1->type == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) - || (t2->type == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); + return (t1->class == HLSL_CLASS_MATRIX && (t1->dimx == 1 || t1->dimy == 1)) + || (t2->class == HLSL_CLASS_MATRIX && (t2->dimx == 1 || t2->dimy == 1)); } /* Both matrices */ @@ -1226,7 +1180,7 @@ static enum hlsl_base_type expr_common_base_type(enum hlsl_base_type t1, enum hl static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct hlsl_type *t2, const struct vkd3d_shader_location *loc, enum hlsl_type_class *type, unsigned int *dimx, unsigned int *dimy) { - if (t1->type > HLSL_CLASS_LAST_NUMERIC) + if (t1->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string; @@ -1237,7 +1191,7 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct return false; } - if (t2->type > HLSL_CLASS_LAST_NUMERIC) + if (t2->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string; @@ -1264,17 +1218,17 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct if (t1->dimx == 1 && t1->dimy == 1) { - *type = t2->type; + *type = t2->class; *dimx = t2->dimx; *dimy = t2->dimy; } else if (t2->dimx == 1 && t2->dimy == 1) { - *type = t1->type; + *type = t1->class; *dimx = t1->dimx; *dimy = t1->dimy; } - else if (t1->type == HLSL_CLASS_MATRIX && t2->type == HLSL_CLASS_MATRIX) + else if (t1->class == HLSL_CLASS_MATRIX && t2->class == HLSL_CLASS_MATRIX) { *type = HLSL_CLASS_MATRIX; *dimx = min(t1->dimx, t2->dimx); @@ -1284,13 +1238,13 @@ static bool expr_common_shape(struct hlsl_ctx *ctx, struct hlsl_type *t1, struct { if (t1->dimx * t1->dimy <= t2->dimx * t2->dimy) { - *type = t1->type; + *type = t1->class; *dimx = t1->dimx; *dimy = t1->dimy; } else { - *type = t2->type; + *type = t2->class; *dimx = t2->dimx; *dimy = t2->dimy; } @@ -1306,55 +1260,50 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *expr; unsigned int i; - if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { - struct hlsl_type *vector_type; + struct hlsl_type *scalar_type; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load; struct hlsl_ir_var *var; - vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + scalar_type = hlsl_get_scalar_type(ctx, type->base_type); if (!(var = hlsl_new_synthetic_var(ctx, "split_op", type, loc))) return NULL; hlsl_init_simple_deref_from_var(&var_deref, var); - for (i = 0; i < hlsl_type_major_size(type); ++i) + for (i = 0; i < type->dimy * type->dimx; ++i) { - struct hlsl_ir_node *value, *vector_operands[HLSL_MAX_OPERANDS] = { NULL }; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *value, *cell_operands[HLSL_MAX_OPERANDS] = { NULL }; + struct hlsl_block block; unsigned int j; - if (!(c = hlsl_new_uint_constant(ctx, i, loc))) - return NULL; - list_add_tail(instrs, &c->node.entry); - for (j = 0; j < HLSL_MAX_OPERANDS; j++) { if (operands[j]) { - struct hlsl_ir_load *load; - - if (!(load = add_load_index(ctx, instrs, operands[j], &c->node, loc))) + if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) return NULL; - vector_operands[j] = &load->node; + + cell_operands[j] = load; } } - if (!(value = add_expr(ctx, instrs, op, vector_operands, vector_type, loc))) + if (!(value = add_expr(ctx, instrs, op, cell_operands, scalar_type, loc))) return NULL; - if (!(store = hlsl_new_store_index(ctx, &var_deref, &c->node, value, 0, loc))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i, value)) return NULL; - list_add_tail(instrs, &store->node.entry); + list_move_tail(instrs, &block.instrs); } - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(instrs, &load->node.entry); + list_add_tail(instrs, &var_load->node.entry); - return &load->node; + return &var_load->node; } if (!(expr = hlsl_new_expr(ctx, op, operands, type, loc))) @@ -1407,7 +1356,7 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type; - bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, + bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy); if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) @@ -1416,20 +1365,27 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct return add_expr(ctx, instrs, op, args, bool_type, loc); } -static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, - enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, - const struct vkd3d_shader_location *loc) +static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *arg1, + const struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { - struct hlsl_type *common_type; enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); enum hlsl_type_class type; unsigned int dimx, dimy; - struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; if (!expr_common_shape(ctx, arg1->data_type, arg2->data_type, loc, &type, &dimx, &dimy)) return NULL; - common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); + return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); +} + +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, + enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *common_type; + + common_type = get_common_numeric_type(ctx, arg1, arg2, loc); if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) return NULL; @@ -1441,13 +1397,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str } static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, struct vkd3d_shader_location loc) + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); list_move_tail(list1, list2); vkd3d_free(list2); - add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, &loc); + add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); return list1; } @@ -1499,13 +1455,13 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str } static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location loc) + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); list_move_tail(list1, list2); vkd3d_free(list2); - add_binary_comparison_expr(ctx, list1, op, arg1, arg2, &loc); + add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); return list1; } @@ -1596,7 +1552,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis enum hlsl_ir_expr_op op; unsigned dim; - if (arg1->data_type->type == HLSL_CLASS_MATRIX) + if (arg1->data_type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string; @@ -1607,7 +1563,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; } - if (arg2->data_type->type == HLSL_CLASS_MATRIX) + if (arg2->data_type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string; @@ -1618,9 +1574,9 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis return NULL; } - if (arg1->data_type->type == HLSL_CLASS_SCALAR) + if (arg1->data_type->class == HLSL_CLASS_SCALAR) dim = arg2->data_type->dimx; - else if (arg2->data_type->type == HLSL_CLASS_SCALAR) + else if (arg2->data_type->class == HLSL_CLASS_SCALAR) dim = arg1->data_type->dimx; else dim = min(arg1->data_type->dimx, arg2->data_type->dimx); @@ -1702,7 +1658,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_expr *copy; + struct hlsl_ir_node *copy; unsigned int writemask = 0; if (assign_op == ASSIGN_OP_SUB) @@ -1720,13 +1676,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in return NULL; } - if (lhs_type->type <= HLSL_CLASS_LAST_NUMERIC) + if (lhs_type->class <= HLSL_CLASS_LAST_NUMERIC) writemask = (1 << lhs_type->dimx) - 1; if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) return NULL; - while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_RESOURCE_LOAD) + while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_INDEX) { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { @@ -1735,10 +1691,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } else if (lhs->type == HLSL_IR_SWIZZLE) { - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs), *new_swizzle; + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); unsigned int width, s = swizzle->swizzle; + struct hlsl_ir_node *new_swizzle; - if (lhs->data_type->type == HLSL_CLASS_MATRIX) + if (lhs->data_type->class == HLSL_CLASS_MATRIX) hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); if (!invert_swizzle(&s, &writemask, &width)) @@ -1751,10 +1708,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { return NULL; } - list_add_tail(instrs, &new_swizzle->node.entry); + list_add_tail(instrs, &new_swizzle->entry); lhs = swizzle->val.node; - rhs = &new_swizzle->node; + rhs = new_swizzle; } else { @@ -1763,18 +1720,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } } - if (lhs->type == HLSL_IR_RESOURCE_LOAD) + if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_resource_access(hlsl_ir_index(lhs))) { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(lhs); - struct hlsl_ir_resource_store *store; + struct hlsl_ir_node *coords = hlsl_ir_index(lhs)->idx.node; + struct hlsl_deref resource_deref; struct hlsl_type *resource_type; - struct hlsl_ir_swizzle *coords; + struct hlsl_ir_node *store; unsigned int dim_count; - /* Such an lvalue was produced by an index expression. */ - assert(load->load_type == HLSL_RESOURCE_LOAD); - resource_type = hlsl_deref_get_type(ctx, &load->resource); - assert(resource_type->type == HLSL_CLASS_OBJECT); + if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, hlsl_ir_index(lhs)->val.node)) + return NULL; + + resource_type = hlsl_deref_get_type(ctx, &resource_deref); + assert(resource_type->class == HLSL_CLASS_OBJECT); assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); if (resource_type->base_type != HLSL_TYPE_UAV) @@ -1787,25 +1745,70 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components."); - /* Remove the (implicit) mipmap level from the load expression. */ - assert(load->coords.node->data_type->type == HLSL_CLASS_VECTOR); - assert(load->coords.node->data_type->base_type == HLSL_TYPE_UINT); - assert(load->coords.node->data_type->dimx == dim_count + 1); - if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dim_count, load->coords.node, &lhs->loc))) - return NULL; - list_add_tail(instrs, &coords->node.entry); + assert(coords->data_type->class == HLSL_CLASS_VECTOR); + assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count); - if (!(store = hlsl_new_resource_store(ctx, &load->resource, &coords->node, rhs, &lhs->loc))) + if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) + { + hlsl_cleanup_deref(&resource_deref); return NULL; - list_add_tail(instrs, &store->node.entry); + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&resource_deref); + } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) + { + struct hlsl_ir_index *row = hlsl_ir_index(lhs); + struct hlsl_ir_node *mat = row->val.node; + unsigned int i, k = 0; + + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *cell, *load, *store, *c; + struct hlsl_deref deref; + + if (!(writemask & (1 << i))) + continue; + + if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) + return NULL; + list_add_tail(instrs, &c->entry); + + if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) + return NULL; + list_add_tail(instrs, &cell->entry); + + if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + return NULL; + + if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) + return NULL; + + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load, 0, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&deref); + } } else { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; + struct hlsl_deref deref; - if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) + if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) return NULL; - list_add_tail(instrs, &store->node.entry); + + if (!(store = hlsl_new_store_index(ctx, &deref, NULL, rhs, writemask, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + list_add_tail(instrs, &store->entry); + hlsl_cleanup_deref(&deref); } /* Don't use the instruction itself as a source, as this makes structure @@ -1813,37 +1816,37 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in * the last instruction in the list, we do need to copy. */ if (!(copy = hlsl_new_copy(ctx, rhs))) return NULL; - list_add_tail(instrs, ©->node.entry); - return ©->node; + list_add_tail(instrs, ©->entry); + return copy; } static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrement, bool post, - struct vkd3d_shader_location loc) + const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *lhs = node_from_list(instrs); - struct hlsl_ir_constant *one; + struct hlsl_ir_node *one; if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - hlsl_error(ctx, &loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); - if (!(one = hlsl_new_int_constant(ctx, 1, &loc))) + if (!(one = hlsl_new_int_constant(ctx, 1, loc))) return false; - list_add_tail(instrs, &one->node.entry); + list_add_tail(instrs, &one->entry); - if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, &one->node)) + if (!add_assignment(ctx, instrs, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one)) return false; if (post) { - struct hlsl_ir_expr *copy; + struct hlsl_ir_node *copy; if (!(copy = hlsl_new_copy(ctx, lhs))) return false; - list_add_tail(instrs, ©->node.entry); + list_add_tail(instrs, ©->entry); /* Post increment/decrement expressions are considered const. */ - if (!(copy->node.data_type = hlsl_type_clone(ctx, copy->node.data_type, 0, HLSL_MODIFIER_CONST))) + if (!(copy->data_type = hlsl_type_clone(ctx, copy->data_type, 0, HLSL_MODIFIER_CONST))) return false; } @@ -1861,10 +1864,8 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, for (k = 0; k < src_comp_count; ++k) { + struct hlsl_ir_node *conv, *load; struct hlsl_type *dst_comp_type; - struct hlsl_ir_store *store; - struct hlsl_ir_load *load; - struct hlsl_ir_node *conv; struct hlsl_block block; if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) @@ -1872,10 +1873,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc))) + if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) return; - if (!(store = hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv))) + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) return; list_move_tail(instrs, &block.instrs); @@ -1885,12 +1886,12 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_struct) { - if (type->type == HLSL_CLASS_OBJECT) + if (type->class == HLSL_CLASS_OBJECT) return !must_be_in_struct; - if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_has_object_components(type->e.array.type, must_be_in_struct); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { unsigned int i; @@ -1905,12 +1906,12 @@ static bool type_has_object_components(struct hlsl_type *type, bool must_be_in_s static bool type_has_numeric_components(struct hlsl_type *type) { - if (type->type <= HLSL_CLASS_LAST_NUMERIC) + if (type->class <= HLSL_CLASS_LAST_NUMERIC) return true; - if (type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_ARRAY) return type_has_numeric_components(type->e.array.type); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { unsigned int i; @@ -1934,7 +1935,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_type *type; bool local = true; - if (basic_type->type == HLSL_CLASS_MATRIX) + if (basic_type->class == HLSL_CLASS_MATRIX) assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if (!(statements_list = make_empty_list(ctx))) @@ -1966,7 +1967,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t type = basic_type; - if (shader_is_sm_5_1(ctx) && type->type == HLSL_CLASS_OBJECT) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { for (i = 0; i < v->arrays.count; ++i) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); @@ -2035,7 +2036,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } vkd3d_free(v->arrays.sizes); - if (!(var = hlsl_new_var(ctx, v->name, type, v->loc, &v->semantic, modifiers, &v->reg_reservation))) + if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) { free_parse_variable_def(v); continue; @@ -2043,6 +2044,13 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t var->buffer = ctx->cur_buffer; + if (var->buffer == ctx->globals_buffer) + { + if (var->reg_reservation.offset_type) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is only allowed inside constant buffer declarations."); + } + if (ctx->cur_scope == ctx->globals) { local = false; @@ -2148,7 +2156,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else { - struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, var->loc); + struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); assert(v->initializer.args_count == 1); list_add_tail(v->initializer.instrs, &load->node.entry); @@ -2156,7 +2164,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } if (modifiers & HLSL_STORAGE_STATIC) - list_move_tail(&ctx->static_initializers, v->initializer.instrs); + list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); else list_move_tail(statements_list, v->initializer.instrs); vkd3d_free(v->initializer.args); @@ -2164,9 +2172,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { - struct hlsl_ir_constant *zero; - struct hlsl_ir_store *store; - struct hlsl_ir_node *cast; + struct hlsl_ir_node *cast, *store, *zero; /* Initialize statics to zero by default. */ @@ -2181,9 +2187,9 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &zero->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, zero); - if (!(cast = add_cast(ctx, &ctx->static_initializers, &zero->node, var->data_type, &var->loc))) + if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) { vkd3d_free(v); continue; @@ -2194,7 +2200,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t vkd3d_free(v); continue; } - list_add_tail(&ctx->static_initializers, &store->node.entry); + hlsl_block_add_instr(&ctx->static_initializers, store); } vkd3d_free(v); } @@ -2279,7 +2285,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, if (type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF) return arg; - type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return add_implicit_conversion(ctx, params->instrs, arg, type, loc); } @@ -2315,12 +2321,12 @@ static struct hlsl_type *elementwise_intrinsic_get_common_type(struct hlsl_ctx * base = expr_common_base_type(base, arg_type->base_type); - if (arg_type->type == HLSL_CLASS_VECTOR) + if (arg_type->class == HLSL_CLASS_VECTOR) { vectors = true; dimx = min(dimx, arg_type->dimx); } - else if (arg_type->type == HLSL_CLASS_MATRIX) + else if (arg_type->class == HLSL_CLASS_MATRIX) { matrices = true; dimx = min(dimx, arg_type->dimx); @@ -2369,7 +2375,7 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return convert_args(ctx, params, type, loc); } @@ -2383,20 +2389,18 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg = params->args[0], *mul; - struct hlsl_ir_constant *one, *zero; - struct hlsl_ir_load *load; + struct hlsl_ir_node *arg = params->args[0], *mul, *one, *zero, *load; unsigned int i, count; if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; - list_add_tail(params->instrs, &one->node.entry); + list_add_tail(params->instrs, &one->entry); if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->node.entry); + list_add_tail(params->instrs, &zero->entry); - mul = &one->node; + mul = one; count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) @@ -2404,52 +2408,123 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) return false; - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &load->node, mul, loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) return false; } - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, &zero->node, loc); -} - -/* Find the type corresponding to the given source type, with the same - * dimensions but a different base type. */ -static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, - const struct hlsl_type *type, enum hlsl_base_type base_type) -{ - return hlsl_get_numeric_type(ctx, type->type, base_type, type->dimx, type->dimy); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); } -static bool intrinsic_asuint(struct hlsl_ctx *ctx, +static bool intrinsic_any(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_type *data_type; + struct hlsl_ir_node *arg = params->args[0], *dot, *or, *zero, *bfalse, *load; + unsigned int i, count; - if (params->args_count != 1 && params->args_count != 3) + if (arg->data_type->class != HLSL_CLASS_VECTOR && arg->data_type->class != HLSL_CLASS_SCALAR) { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); + hlsl_fixme(ctx, loc, "any() implementation for non-vector, non-scalar"); return false; } - if (params->args_count == 3) + if (arg->data_type->base_type == HLSL_TYPE_FLOAT) { - hlsl_fixme(ctx, loc, "Double-to-integer conversion."); - return false; - } + if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); - data_type = params->args[0]->data_type; - if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) - { - struct vkd3d_string_buffer *string; + if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) + return false; - if ((string = hlsl_type_to_string(ctx, data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", - string->buffer); - hlsl_release_string_buffer(ctx, string); + return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); } - data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); + else if (arg->data_type->base_type == HLSL_TYPE_BOOL) + { + if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) + return false; + list_add_tail(params->instrs, &bfalse->entry); + + or = bfalse; + + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { + if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) + return false; + } + + return true; + } + + hlsl_fixme(ctx, loc, "any() implementation for non-float, non-bool"); + return false; +} + +/* Find the type corresponding to the given source type, with the same + * dimensions but a different base type. */ +static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, + const struct hlsl_type *type, enum hlsl_base_type base_type) +{ + return hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy); +} + +static bool intrinsic_asfloat(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *data_type; + + data_type = params->args[0]->data_type; + if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong argument type of asfloat(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); +} + +static bool intrinsic_asuint(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *data_type; + + if (params->args_count != 1 && params->args_count != 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to function 'asuint': expected 1 or 3, but got %u.", params->args_count); + return false; + } + + if (params->args_count == 3) + { + hlsl_fixme(ctx, loc, "Double-to-integer conversion."); + return false; + } + + data_type = params->args[0]->data_type; + if (data_type->base_type == HLSL_TYPE_BOOL || data_type->base_type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of asuint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); operands[0] = params->args[0]; return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); @@ -2483,7 +2558,7 @@ static bool intrinsic_cos(struct hlsl_ctx *ctx, static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_swizzle *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; + struct hlsl_ir_node *arg1_swzl1, *arg1_swzl2, *arg2_swzl1, *arg2_swzl2; struct hlsl_ir_node *arg1 = params->args[0], *arg2 = params->args[1]; struct hlsl_ir_node *arg1_cast, *arg2_cast, *mul1_neg, *mul1, *mul2; struct hlsl_type *cast_type; @@ -2504,35 +2579,55 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl1->node.entry); + list_add_tail(params->instrs, &arg1_swzl1->entry); if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl1->node.entry); + list_add_tail(params->instrs, &arg2_swzl1->entry); - if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl1->node, &arg2_swzl1->node, loc))) + if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) return false; - if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, *loc))) + if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) return false; list_add_tail(params->instrs, &mul1_neg->entry); if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl2->node.entry); + list_add_tail(params->instrs, &arg1_swzl2->entry); if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl2->node.entry); + list_add_tail(params->instrs, &arg2_swzl2->entry); - if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, - &arg1_swzl2->node, &arg2_swzl2->node, loc))) + if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false; return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, mul2, mul1_neg, loc); } +static bool intrinsic_ddx(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); +} + +static bool intrinsic_ddy(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2565,8 +2660,7 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, static bool intrinsic_exp(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_constant *coeff; - struct hlsl_ir_node *arg, *mul; + struct hlsl_ir_node *arg, *mul, *coeff; if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) return false; @@ -2574,9 +2668,9 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; - list_add_tail(params->instrs, &coeff->node.entry); + list_add_tail(params->instrs, &coeff->entry); - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &coeff->node, params->args[0], loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false; return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); @@ -2604,6 +2698,43 @@ static bool intrinsic_floor(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FLOOR, arg, loc); } +static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *x, *y, *div, *abs, *frac, *neg_frac, *ge, *select, *zero; + static const struct hlsl_constant_value zero_value; + + if (!(x = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(y = intrinsic_float_convert_arg(ctx, params, params->args[1], loc))) + return false; + + if (!(div = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, x, y, loc))) + return false; + + if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); + + if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) + return false; + + if (!(frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_FRACT, abs, loc))) + return false; + + if (!(neg_frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, frac, loc))) + return false; + + if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) + return false; + + if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); +} + static bool intrinsic_frac(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2635,7 +2766,7 @@ static bool intrinsic_length(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *arg, *dot; - if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string; @@ -2692,20 +2823,18 @@ static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, static bool intrinsic_lit(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow; - struct hlsl_ir_constant *init, *zero; - struct hlsl_ir_node *n_l, *n_h, *m; - struct hlsl_ir_node *diffuse; - struct hlsl_ir_store *store; + struct hlsl_ir_node *n_l_neg, *n_h_neg, *specular_or, *specular_pow, *load; + struct hlsl_ir_node *n_l, *n_h, *m, *diffuse, *zero, *store, *init; + struct hlsl_constant_value init_value; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; struct hlsl_type *ret_type; - struct hlsl_ir_load *load; struct hlsl_ir_var *var; struct hlsl_block block; - if (params->args[0]->data_type->type != HLSL_CLASS_SCALAR - || params->args[1]->data_type->type != HLSL_CLASS_SCALAR - || params->args[2]->data_type->type != HLSL_CLASS_SCALAR) + if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR + || params->args[1]->data_type->class != HLSL_CLASS_SCALAR + || params->args[2]->data_type->class != HLSL_CLASS_SCALAR) { hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); return false; @@ -2726,37 +2855,35 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, return false; hlsl_init_simple_deref_from_var(&var_deref, var); - if (!(init = hlsl_new_constant(ctx, ret_type, loc))) + init_value.u[0].f = 1.0f; + init_value.u[1].f = 0.0f; + init_value.u[2].f = 0.0f; + init_value.u[3].f = 1.0f; + if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) return false; - init->value[0].f = 1.0f; - init->value[1].f = 0.0f; - init->value[2].f = 0.0f; - init->value[3].f = 1.0f; - list_add_tail(params->instrs, &init->node.entry); + list_add_tail(params->instrs, &init->entry); - if (!(store = hlsl_new_simple_store(ctx, var, &init->node))) + if (!(store = hlsl_new_simple_store(ctx, var, init))) return false; - list_add_tail(params->instrs, &store->node.entry); + list_add_tail(params->instrs, &store->entry); if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->node.entry); + list_add_tail(params->instrs, &zero->entry); /* Diffuse component. */ - if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, &zero->node, loc))) + if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) return false; - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) return false; list_move_tail(params->instrs, &block.instrs); /* Specular component. */ - if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, - n_h, &zero->node, loc))) + if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) return false; - if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, - n_l, &zero->node, loc))) + if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) return false; if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) @@ -2765,20 +2892,67 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) return false; - if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, &zero->node, specular_pow))) + if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) return false; - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, 2, &load->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) return false; list_move_tail(params->instrs, &block.instrs); - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &var_load->node.entry); return true; } +static bool intrinsic_log(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) + return false; + + /* ln(2) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.69314718055f, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} + +static bool intrinsic_log10(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *log, *arg, *coeff; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + if (!(log = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc))) + return false; + + /* 1 / log2(10) */ + if (!(coeff = hlsl_new_float_constant(ctx, 0.301029996f, loc))) + return false; + + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, log, coeff, loc); +} + +static bool intrinsic_log2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); +} + static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2808,15 +2982,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, struct hlsl_ir_load *load; struct hlsl_ir_var *var; - if (arg1->data_type->type == HLSL_CLASS_SCALAR || arg2->data_type->type == HLSL_CLASS_SCALAR) + if (arg1->data_type->class == HLSL_CLASS_SCALAR || arg2->data_type->class == HLSL_CLASS_SCALAR) return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1, arg2, loc); - if (arg1->data_type->type == HLSL_CLASS_VECTOR) + if (arg1->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; cast_type1 = hlsl_get_matrix_type(ctx, base, arg1->data_type->dimx, 1); } - if (arg2->data_type->type == HLSL_CLASS_VECTOR) + if (arg2->data_type->class == HLSL_CLASS_VECTOR) { vect_count++; cast_type2 = hlsl_get_matrix_type(ctx, base, 1, arg2->data_type->dimx); @@ -2854,13 +3028,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, for (j = 0; j < matrix_type->dimy; ++j) { struct hlsl_ir_node *instr = NULL; - struct hlsl_ir_store *store; struct hlsl_block block; for (k = 0; k < cast_type1->dimx && k < cast_type2->dimy; ++k) { - struct hlsl_ir_load *value1, *value2; - struct hlsl_ir_node *mul; + struct hlsl_ir_node *value1, *value2, *mul; if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) return false; @@ -2868,7 +3040,7 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) return false; - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &value1->node, &value2->node, loc))) + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) return false; if (instr) @@ -2882,13 +3054,13 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } } - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; list_move_tail(params->instrs, &block.instrs); } } - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; list_add_tail(params->instrs, &load->node.entry); @@ -2901,7 +3073,7 @@ static bool intrinsic_normalize(struct hlsl_ctx *ctx, struct hlsl_type *type = params->args[0]->data_type; struct hlsl_ir_node *dot, *rsq, *arg; - if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string; @@ -2986,6 +3158,42 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc); } +static bool intrinsic_sign(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *lt, *neg, *op1, *op2, *zero, *arg = params->args[0]; + static const struct hlsl_constant_value zero_value; + + struct hlsl_type *int_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_INT, + arg->data_type->dimx, arg->data_type->dimy); + + if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) + return false; + list_add_tail(params->instrs, &zero->entry); + + /* Check if 0 < arg, cast bool to int */ + + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) + return false; + + if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + return false; + + /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ + + if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) + return false; + + if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + return false; + + if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) + return false; + + /* Adding these two together will make 1 when > 0, -1 when < 0, and 0 when neither */ + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, neg, op1, loc); +} + static bool intrinsic_sin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3001,8 +3209,7 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res; - struct hlsl_ir_constant *one, *minus_two, *three; + struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res, *one, *minus_two, *three; if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; @@ -3022,9 +3229,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; - list_add_tail(params->instrs, &one->node.entry); + list_add_tail(params->instrs, &one->entry); - if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc))) + if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) return false; if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc))) @@ -3035,16 +3242,16 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; - list_add_tail(params->instrs, &minus_two->node.entry); + list_add_tail(params->instrs, &minus_two->entry); if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) return false; - list_add_tail(params->instrs, &three->node.entry); + list_add_tail(params->instrs, &three->entry); - if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &minus_two->node, p, loc))) + if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) return false; - if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &three->node, res, loc))) + if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, three, res, loc))) return false; if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc))) @@ -3081,7 +3288,7 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, return false; type = ge->data_type; - type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); } @@ -3090,9 +3297,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * { struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - struct hlsl_ir_node *coords; + struct hlsl_ir_node *coords, *load; if (params->args_count != 2 && params->args_count != 4) { @@ -3107,7 +3312,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * } sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) { struct vkd3d_string_buffer *string; @@ -3118,24 +3323,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * name, ctx->builtin_types.sampler[dim]->name, string->buffer); hlsl_release_string_buffer(ctx, string); } - else - { - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); - - load_params.resource = sampler_load->src; - } if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) coords = params->args[1]; load_params.coords = coords; + load_params.resource = params->args[0]; load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + load_params.sampling_dim = dim; if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &load->entry); return true; } @@ -3156,13 +3356,14 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_ir_node *arg = params->args[0]; struct hlsl_type *arg_type = arg->data_type; + struct hlsl_ir_load *var_load; struct hlsl_deref var_deref; struct hlsl_type *mat_type; - struct hlsl_ir_load *load; + struct hlsl_ir_node *load; struct hlsl_ir_var *var; unsigned int i, j; - if (arg_type->type != HLSL_CLASS_SCALAR && arg_type->type != HLSL_CLASS_MATRIX) + if (arg_type->class != HLSL_CLASS_SCALAR && arg_type->class != HLSL_CLASS_MATRIX) { struct vkd3d_string_buffer *string; @@ -3174,7 +3375,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, return false; } - if (arg_type->type == HLSL_CLASS_SCALAR) + if (arg_type->class == HLSL_CLASS_SCALAR) { list_add_tail(params->instrs, &arg->entry); return true; @@ -3190,21 +3391,75 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { for (j = 0; j < arg_type->dimy; ++j) { - struct hlsl_ir_store *store; struct hlsl_block block; if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) return false; - if (!(store = hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, &load->node))) + if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) return false; list_move_tail(params->instrs, &block.instrs); } } - if (!(load = hlsl_new_var_load(ctx, var, *loc))) + if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + list_add_tail(params->instrs, &var_load->node.entry); + + return true; +} + +static bool intrinsic_trunc(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, arg, loc); +} + +static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg = params->args[0], *ret, *c, *swizzle; + struct hlsl_type *arg_type = arg->data_type; + + if (arg_type->class != HLSL_CLASS_SCALAR && !(arg_type->class == HLSL_CLASS_VECTOR && arg_type->dimx == 4)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, arg_type))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Wrong argument type '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + + return false; + } + + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + + if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) + return false; + list_add_tail(params->instrs, &c->entry); + + if (arg_type->class == HLSL_CLASS_VECTOR) + { + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) + return false; + list_add_tail(params->instrs, &swizzle->entry); + + arg = swizzle; + } + + if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) + return false; + + if (ctx->profile->major_version >= 4) + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); return true; } @@ -3220,22 +3475,31 @@ static const struct intrinsic_function intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"abs", 1, true, intrinsic_abs}, {"all", 1, true, intrinsic_all}, + {"any", 1, true, intrinsic_any}, + {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, + {"ddx", 1, true, intrinsic_ddx}, + {"ddy", 1, true, intrinsic_ddy}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, {"ldexp", 2, true, intrinsic_ldexp}, {"length", 1, true, intrinsic_length}, {"lerp", 3, true, intrinsic_lerp}, {"lit", 3, true, intrinsic_lit}, + {"log", 1, true, intrinsic_log}, + {"log10", 1, true, intrinsic_log10}, + {"log2", 1, true, intrinsic_log2}, {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, {"mul", 2, true, intrinsic_mul}, @@ -3245,6 +3509,7 @@ intrinsic_functions[] = {"round", 1, true, intrinsic_round}, {"rsqrt", 1, true, intrinsic_rsqrt}, {"saturate", 1, true, intrinsic_saturate}, + {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, @@ -3252,6 +3517,7 @@ intrinsic_functions[] = {"tex2D", -1, false, intrinsic_tex2D}, {"tex3D", -1, false, intrinsic_tex3D}, {"transpose", 1, true, intrinsic_transpose}, + {"trunc", 1, true, intrinsic_trunc}, }; static int intrinsic_function_name_compare(const void *a, const void *b) @@ -3291,11 +3557,11 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (param->storage_modifiers & HLSL_STORAGE_IN) { - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; if (!(store = hlsl_new_simple_store(ctx, param, arg))) goto fail; - list_add_tail(args->instrs, &store->node.entry); + list_add_tail(args->instrs, &store->entry); } } @@ -3316,7 +3582,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Output argument to \"%s\" is const.", decl->func->name); - if (!(load = hlsl_new_var_load(ctx, param, arg->loc))) + if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) goto fail; list_add_tail(args->instrs, &load->node.entry); @@ -3329,7 +3595,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, { struct hlsl_ir_load *load; - if (!(load = hlsl_new_var_load(ctx, decl->return_var, *loc))) + if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) goto fail; list_add_tail(args->instrs, &load->node.entry); } @@ -3360,7 +3626,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, for (i = 0; i < args->args_count; ++i) { - if (args->args[i]->data_type->type > HLSL_CLASS_LAST_NUMERIC) + if (args->args[i]->data_type->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string; @@ -3397,20 +3663,20 @@ fail: } static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, - struct parse_initializer *params, struct vkd3d_shader_location loc) + struct parse_initializer *params, const struct vkd3d_shader_location *loc) { struct hlsl_ir_load *load; struct hlsl_ir_var *var; unsigned int i, idx = 0; - if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, &loc))) + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL; for (i = 0; i < params->args_count; ++i) { struct hlsl_ir_node *arg = params->args[i]; - if (arg->data_type->type == HLSL_CLASS_OBJECT) + if (arg->data_type->class == HLSL_CLASS_OBJECT) { struct vkd3d_string_buffer *string; @@ -3455,320 +3721,526 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) } } -static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -{ - const struct hlsl_type *object_type = object->data_type; - struct hlsl_ir_load *object_load; +static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct hlsl_type *object_type, + const char *method, const struct vkd3d_shader_location *loc) +{ + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, object_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Method '%s' is not defined on type '%s'.", method, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; +} + +static bool add_load_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; + struct hlsl_ir_node *load; + bool multisampled; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + + if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", + 1 + multisampled, 3 + multisampled, params->args_count); + return false; + } + if (multisampled) + { + if (!(load_params.sample_index = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc))) + return false; + } + + assert(offset_dim); + if (params->args_count > 1 + multisampled) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + if (params->args_count > 2 + multisampled) + { + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + } + + /* +1 for the mipmap level for non-multisampled textures */ + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + !multisampled), loc))) + return false; + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +} + +static bool add_sample_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", + 4 + !!offset_dim, params->args_count); + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (offset_dim && params->args_count > 2) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + if (params->args_count > 2 + !!offset_dim) + hlsl_fixme(ctx, loc, "Sample() clamp parameter."); + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + + return true; +} + +static bool add_sample_cmp_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = { 0 }; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (!strcmp(name, "SampleCmpLevelZero")) + load_params.type = HLSL_RESOURCE_SAMPLE_CMP_LZ; + else + load_params.type = HLSL_RESOURCE_SAMPLE_CMP; + + if (params->args_count < 3 || params->args_count > 5 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", + name, 5 + !!offset_dim, params->args_count); + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_COMPARISON) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'SamplerComparisonState', but got '%s'.", + name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; + + if (!(load_params.cmp = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.cmp = params->args[2]; + + if (offset_dim && params->args_count > 3) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "%s() clamp parameter.", name); + if (params->args_count > 4 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); + + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + + return true; +} + +static bool add_gather_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + struct hlsl_resource_load_params load_params = {0}; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; + unsigned int read_channel; + + if (object_type->sampler_dim != HLSL_SAMPLER_DIM_2D + && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DARRAY + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE + && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } + + if (!strcmp(name, "GatherGreen")) + { + load_params.type = HLSL_RESOURCE_GATHER_GREEN; + read_channel = 1; + } + else if (!strcmp(name, "GatherBlue")) + { + load_params.type = HLSL_RESOURCE_GATHER_BLUE; + read_channel = 2; + } + else if (!strcmp(name, "GatherAlpha")) + { + load_params.type = HLSL_RESOURCE_GATHER_ALPHA; + read_channel = 3; + } + else + { + load_params.type = HLSL_RESOURCE_GATHER_RED; + read_channel = 0; + } + + if (!strcmp(name, "Gather") || !offset_dim) + { + if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", + name, 3 + !!offset_dim, params->args_count); + return false; + } + } + else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", + name, params->args_count); + return false; + } + + if (params->args_count == 3 + !!offset_dim || params->args_count == 7) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); - if (object_type->type != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + if (params->args_count == 6 || params->args_count == 7) + { + hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); + } + else if (offset_dim && params->args_count > 2) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) + return false; + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; - if ((string = hlsl_type_to_string(ctx, object_type))) + if ((string = hlsl_type_to_string(ctx, sampler_type))) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Type '%s' does not have methods.", string->buffer); + "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); hlsl_release_string_buffer(ctx, string); return false; } - /* Only HLSL_IR_LOAD can return an object. */ - object_load = hlsl_ir_load(object); - - if (!strcmp(name, "Load") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBE - && object_type->sampler_dim != HLSL_SAMPLER_DIM_CUBEARRAY) + if (read_channel >= object_type->e.resource_format->dimx) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; - struct hlsl_ir_resource_load *load; - bool multisampled; - - multisampled = object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Method %s() requires at least %u channels.", name, read_channel + 1); + return false; + } - if (params->args_count < 1 + multisampled || params->args_count > 3 + multisampled) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'Load': expected between %u and %u, but got %u.", - 1 + multisampled, 3 + multisampled, params->args_count); - return false; - } - if (multisampled) - { - hlsl_fixme(ctx, loc, "Load() sampling index parameter."); - } + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + return false; - assert(offset_dim); - if (params->args_count > 1 + multisampled) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[1 + multisampled], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } - if (params->args_count > 2 + multisampled) - { - hlsl_fixme(ctx, loc, "Tiled resource status argument."); - } + load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); + load_params.resource = object; + load_params.sampler = params->args[0]; - /* +1 for the mipmap level */ - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[0], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) - return false; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +} - load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; +static bool add_sample_lod_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = { 0 }; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); } - else if (!strcmp(name, "Sample") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + + if (!strcmp(name, "SampleLevel")) + load_params.type = HLSL_RESOURCE_SAMPLE_LOD; + else + load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; + + if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 3 to %u, but got %u.", + name, 4 + !!offset_dim, params->args_count); + return false; + } - if (params->args_count < 2 || params->args_count > 4 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'Sample': expected from 2 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); - return false; - } + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } - if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of Sample(): expected 'sampler', but got '%s'.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1]; - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) + load_params.lod = params->args[2]; - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + if (offset_dim && params->args_count > 3) + { + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; + } - if (offset_dim && params->args_count > 2) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } + if (params->args_count > 3 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); - if (params->args_count > 2 + !!offset_dim) - hlsl_fixme(ctx, loc, "Sample() clamp parameter."); - if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; - load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +} - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); +static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + struct hlsl_resource_load_params load_params = { 0 }; + const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); + const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *load; - return true; - } - else if ((!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue") - || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha")) - && (object_type->sampler_dim == HLSL_SAMPLER_DIM_2D - || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DARRAY - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBE - || object_type->sampler_dim == HLSL_SAMPLER_DIM_CUBEARRAY)) + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || object_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) { - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - struct hlsl_resource_load_params load_params = {0}; - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - unsigned int read_channel; - - if (!strcmp(name, "GatherGreen")) - { - load_params.type = HLSL_RESOURCE_GATHER_GREEN; - read_channel = 1; - } - else if (!strcmp(name, "GatherBlue")) - { - load_params.type = HLSL_RESOURCE_GATHER_BLUE; - read_channel = 2; - } - else if (!strcmp(name, "GatherAlpha")) - { - load_params.type = HLSL_RESOURCE_GATHER_ALPHA; - read_channel = 3; - } - else - { - load_params.type = HLSL_RESOURCE_GATHER_RED; - read_channel = 0; - } - - if (!strcmp(name, "Gather") || !offset_dim) - { - if (params->args_count < 2 || params->args_count > 3 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method '%s': expected from 2 to %u, but got %u.", - name, 3 + !!offset_dim, params->args_count); - return false; - } - } - else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method '%s': expected 2, 3, 4, 6 or 7, but got %u.", - name, params->args_count); - return false; - } - - if (params->args_count == 3 + !!offset_dim || params->args_count == 7) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } - if (params->args_count == 6 || params->args_count == 7) - { - hlsl_fixme(ctx, loc, "Multiple %s() offset parameters.", name); - } - else if (offset_dim && params->args_count > 2) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } + load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + if (params->args_count < 4 || params->args_count > 5 + !!offset_dim) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to method '%s': expected from 4 to %u, but got %u.", + name, 5 + !!offset_dim, params->args_count); + return false; + } - if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 1 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { + struct vkd3d_string_buffer *string; - if (read_channel >= object_type->e.resource_format->dimx) - { + if ((string = hlsl_type_to_string(ctx, sampler_type))) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Method %s() requires at least %u channels.", name, read_channel + 1); - return false; - } + "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.coords = params->args[1]; - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - return false; + if (!(load_params.ddx = add_implicit_conversion(ctx, instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddx = params->args[2]; - load_params.format = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4); - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; + if (!(load_params.ddy = add_implicit_conversion(ctx, instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + load_params.ddy = params->args[3]; - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; - } - else if (!strcmp(name, "SampleLevel") - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMS - && object_type->sampler_dim != HLSL_SAMPLER_DIM_2DMSARRAY) + if (offset_dim && params->args_count > 4) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE_LOD}; - const unsigned int sampler_dim = hlsl_sampler_dim_count(object_type->sampler_dim); - const unsigned int offset_dim = hlsl_offset_dim_count(object_type->sampler_dim); - const struct hlsl_type *sampler_type; - struct hlsl_ir_resource_load *load; - struct hlsl_ir_load *sampler_load; - - if (params->args_count < 3 || params->args_count > 4 + !!offset_dim) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Wrong number of arguments to method 'SampleLevel': expected from 3 to %u, but got %u.", - 4 + !!offset_dim, params->args_count); + if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[4], + hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) return false; - } - - sampler_type = params->args[0]->data_type; - if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER - || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { - struct vkd3d_string_buffer *string; + } - if ((string = hlsl_type_to_string(ctx, sampler_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 0 of SampleLevel(): expected 'sampler', but got '%s'.", string->buffer); - hlsl_release_string_buffer(ctx, string); - return false; - } + if (params->args_count > 4 + !!offset_dim) + hlsl_fixme(ctx, loc, "Tiled resource status argument."); - /* Only HLSL_IR_LOAD can return an object. */ - sampler_load = hlsl_ir_load(params->args[0]); + load_params.format = object_type->e.resource_format; + load_params.resource = object; + load_params.sampler = params->args[0]; - if (!(load_params.coords = add_implicit_conversion(ctx, instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - load_params.coords = params->args[1]; + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(instrs, &load->entry); + return true; +} - if (!(load_params.lod = add_implicit_conversion(ctx, instrs, params->args[2], - hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), loc))) - load_params.lod = params->args[2]; +static const struct method_function +{ + const char *name; + bool (*handler)(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); +} +object_methods[] = +{ + { "Gather", add_gather_method_call }, + { "GatherAlpha", add_gather_method_call }, + { "GatherBlue", add_gather_method_call }, + { "GatherGreen", add_gather_method_call }, + { "GatherRed", add_gather_method_call }, + + { "Load", add_load_method_call }, + + { "Sample", add_sample_method_call }, + { "SampleBias", add_sample_lod_method_call }, + { "SampleCmp", add_sample_cmp_method_call }, + { "SampleCmpLevelZero", add_sample_cmp_method_call }, + { "SampleGrad", add_sample_grad_method_call }, + { "SampleLevel", add_sample_lod_method_call }, +}; - if (offset_dim && params->args_count > 3) - { - if (!(load_params.texel_offset = add_implicit_conversion(ctx, instrs, params->args[3], - hlsl_get_vector_type(ctx, HLSL_TYPE_INT, offset_dim), loc))) - return false; - } +static int object_method_function_name_compare(const void *a, const void *b) +{ + const struct method_function *func = b; - if (params->args_count > 3 + !!offset_dim) - hlsl_fixme(ctx, loc, "Tiled resource status argument."); + return strcmp(a, func->name); +} - load_params.format = object_type->e.resource_format; - load_params.resource = object_load->src; - load_params.sampler = sampler_load->src; +static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + const struct hlsl_type *object_type = object->data_type; + const struct method_function *method; - if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) - return false; - list_add_tail(instrs, &load->node.entry); - return true; - } - else + if (object_type->class != HLSL_CLASS_OBJECT || object_type->base_type != HLSL_TYPE_TEXTURE + || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; if ((string = hlsl_type_to_string(ctx, object_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, - "Method '%s' is not defined on type '%s'.", name, string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Type '%s' does not have methods.", string->buffer); hlsl_release_string_buffer(ctx, string); return false; } + + if ((method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), + sizeof(*method), object_method_function_name_compare))) + { + return method->handler(ctx, instrs, object, name, params, loc); + } + else + { + return raise_invalid_method_object_type(ctx, object_type, name, loc); + } } static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type *format, const struct vkd3d_shader_location *loc) { - if (format->type > HLSL_CLASS_VECTOR) + if (format->class > HLSL_CLASS_VECTOR) { struct vkd3d_string_buffer *string; @@ -3846,6 +4318,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_NAMESPACE %token KW_NOINTERPOLATION %token KW_OUT +%token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER %token KW_PRECISE @@ -3854,6 +4327,8 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %token KW_RETURN %token KW_REGISTER %token KW_ROW_MAJOR +%token KW_RWBUFFER +%token KW_RWSTRUCTUREDBUFFER %token KW_RWTEXTURE1D %token KW_RWTEXTURE2D %token KW_RWTEXTURE3D @@ -3933,6 +4408,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type conditional_expr %type declaration %type declaration_statement +%type discard_statement %type equality_expr %type expr %type expr_optional @@ -3968,6 +4444,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type attribute %type attribute_list +%type attribute_list_optional %type boolean @@ -3999,6 +4476,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type parameters %type register_opt +%type packoffset_opt %type texture_type texture_ms_type uav_type @@ -4037,7 +4515,7 @@ buffer_declaration: if ($3.semantic.name) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); - if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, @2))) + if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) YYABORT; } @@ -4261,6 +4739,14 @@ attribute_list: $$.attrs[$$.count++] = $2; } +attribute_list_optional: + %empty + { + $$.count = 0; + $$.attrs = NULL; + } + | attribute_list + func_declaration: func_prototype compound_statement { @@ -4349,8 +4835,11 @@ func_prototype_no_attrs: "Semantics are not allowed on void functions."); } - if ($7.reg_reservation.type) + if ($7.reg_reservation.reg_type) FIXME("Unexpected register reservation for a function.\n"); + if ($7.reg_reservation.offset_type) + hlsl_error(ctx, &@5, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on functions."); if (($$.decl = get_func_decl(&ctx->functions, $3, &$5))) { @@ -4476,17 +4965,24 @@ var_identifier: colon_attribute: %empty { - $$.semantic.name = NULL; - $$.reg_reservation.type = 0; + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; } | semantic { $$.semantic = $1; - $$.reg_reservation.type = 0; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; } | register_opt { - $$.semantic.name = NULL; + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation = $1; + } + | packoffset_opt + { + $$.semantic = (struct hlsl_semantic){0}; $$.reg_reservation = $1; } @@ -4499,6 +4995,9 @@ semantic: ; $$.name = $2; $$.index = atoi(p); + $$.reported_missing = false; + $$.reported_duplicated_output_next_index = 0; + $$.reported_duplicated_input_incompatible_next_index = 0; *p = 0; } @@ -4518,6 +5017,21 @@ register_opt: vkd3d_free($6); } +packoffset_opt: + ':' KW_PACKOFFSET '(' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, NULL, &@$); + + vkd3d_free($4); + } + | ':' KW_PACKOFFSET '(' any_identifier '.' any_identifier ')' + { + $$ = parse_packoffset(ctx, $4, $6, &@$); + + vkd3d_free($4); + vkd3d_free($6); + } + parameters: scope_start { @@ -4536,7 +5050,7 @@ param_list: parameter { memset(&$$, 0, sizeof($$)); - if (!add_func_parameter(ctx, &$$, &$1, @1)) + if (!add_func_parameter(ctx, &$$, &$1, &@1)) { ERR("Error adding function parameter %s.\n", $1.name); YYABORT; @@ -4545,7 +5059,7 @@ param_list: | param_list ',' parameter { $$ = $1; - if (!add_func_parameter(ctx, &$$, &$3, @3)) + if (!add_func_parameter(ctx, &$$, &$3, &@3)) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "Parameter \"%s\" is already declared.", $3.name); @@ -4624,7 +5138,15 @@ texture_ms_type: } uav_type: - KW_RWTEXTURE1D + KW_RWBUFFER + { + $$ = HLSL_SAMPLER_DIM_BUFFER; + } + | KW_RWSTRUCTUREDBUFFER + { + $$ = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER; + } + | KW_RWTEXTURE1D { $$ = HLSL_SAMPLER_DIM_1D; } @@ -4640,7 +5162,7 @@ uav_type: type_no_void: KW_VECTOR '<' type ',' C_INTEGER '>' { - if ($3->type != HLSL_CLASS_SCALAR) + if ($3->class != HLSL_CLASS_SCALAR) { struct vkd3d_string_buffer *string; @@ -4667,7 +5189,7 @@ type_no_void: } | KW_MATRIX '<' type ',' C_INTEGER ',' C_INTEGER '>' { - if ($3->type != HLSL_CLASS_SCALAR) + if ($3->class != HLSL_CLASS_SCALAR) { struct vkd3d_string_buffer *string; @@ -4702,6 +5224,10 @@ type_no_void: { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_GENERIC]; } + | KW_SAMPLERCOMPARISONSTATE + { + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_COMPARISON]; + } | KW_SAMPLER1D { $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_1D]; @@ -4716,7 +5242,7 @@ type_no_void: } | KW_SAMPLERCUBE { - $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_3D]; + $$ = ctx->builtin_types.sampler[HLSL_SAMPLER_DIM_CUBE]; } | KW_TEXTURE { @@ -4740,23 +5266,58 @@ type_no_void: } | texture_ms_type '<' type ',' shift_expr '>' { - unsigned int sample_count = evaluate_static_expression(node_from_list($5)); - destroy_instr_list($5); + unsigned int sample_count; + struct hlsl_block block; + + hlsl_block_init(&block); + list_move_tail(&block.instrs, $5); + + sample_count = evaluate_static_expression_as_uint(ctx, &block, &@5); + + hlsl_block_cleanup(&block); + + vkd3d_free($5); $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); } | uav_type '<' type '>' { - if ($3->type > HLSL_CLASS_VECTOR) - { - struct vkd3d_string_buffer *string; + struct vkd3d_string_buffer *string = hlsl_type_to_string(ctx, $3); - string = hlsl_type_to_string(ctx, $3); + if (!type_contains_only_numerics($3)) + { if (string) hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "UAV data type %s is not scalar or vector.", string->buffer); - hlsl_release_string_buffer(ctx, string); + "UAV type %s is not numeric.", string->buffer); + } + + switch ($1) + { + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_2D: + case HLSL_SAMPLER_DIM_3D: + if ($3->class == HLSL_CLASS_ARRAY) + { + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "This type of UAV does not support array type."); + } + else if (hlsl_type_component_count($3) > 4) + { + if (string) + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "UAV data type %s size exceeds maximum size.", string->buffer); + } + break; + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + break; + default: + vkd3d_unreachable(); } + + hlsl_release_string_buffer(ctx, string); + $$ = hlsl_new_uav_type(ctx, $1, $3); } | TYPE_IDENTIFIER @@ -4779,7 +5340,7 @@ type_no_void: | KW_STRUCT TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); - if ($$->type != HLSL_CLASS_STRUCT) + if ($$->class != HLSL_CLASS_STRUCT) hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_REDEFINED, "\"%s\" redefined as a structure.", $2); vkd3d_free($2); } @@ -4934,10 +5495,17 @@ arrays: } | '[' expr ']' arrays { - unsigned int size = evaluate_static_expression(node_from_list($2)); + struct hlsl_block block; uint32_t *new_array; + unsigned int size; - destroy_instr_list($2); + hlsl_clone_block(ctx, &block, &ctx->static_initializers); + list_move_tail(&block.instrs, $2); + + size = evaluate_static_expression_as_uint(ctx, &block, &@2); + + hlsl_block_cleanup(&block); + vkd3d_free($2); $$ = $4; @@ -4988,59 +5556,59 @@ var_modifiers: } | KW_EXTERN var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_EXTERN, &@1); } | KW_NOINTERPOLATION var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOINTERPOLATION, &@1); } | KW_PRECISE var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); } | KW_SHARED var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); } | KW_GROUPSHARED var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_GROUPSHARED, &@1); } | KW_STATIC var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_STATIC, &@1); } | KW_UNIFORM var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_UNIFORM, &@1); } | KW_VOLATILE var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_VOLATILE, &@1); } | KW_CONST var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_CONST, &@1); } | KW_ROW_MAJOR var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_ROW_MAJOR, &@1); } | KW_COLUMN_MAJOR var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, @1); + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_COLUMN_MAJOR, &@1); } | KW_IN var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN, &@1); } | KW_OUT var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_OUT, &@1); } | KW_INOUT var_modifiers { - $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, @1); + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_IN | HLSL_STORAGE_OUT, &@1); } @@ -5145,6 +5713,7 @@ statement: declaration_statement | expr_statement | compound_statement + | discard_statement | jump_statement | selection_statement | loop_statement @@ -5152,7 +5721,7 @@ statement: jump_statement: KW_RETURN expr ';' { - if (!add_return(ctx, $2, node_from_list($2), @1)) + if (!add_return(ctx, $2, node_from_list($2), &@1)) YYABORT; $$ = $2; } @@ -5160,65 +5729,81 @@ jump_statement: { if (!($$ = make_empty_list(ctx))) YYABORT; - if (!add_return(ctx, $$, NULL, @1)) + if (!add_return(ctx, $$, NULL, &@1)) + YYABORT; + } + +discard_statement: + KW_DISCARD ';' + { + struct hlsl_ir_node *discard; + + if (!($$ = make_empty_list(ctx))) YYABORT; + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) + return false; + list_add_tail($$, &discard->entry); } selection_statement: KW_IF '(' expr ')' if_body { struct hlsl_ir_node *condition = node_from_list($3); - struct hlsl_ir_if *instr; - - if (!(instr = hlsl_new_if(ctx, condition, @1))) + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *instr; + + hlsl_block_init(&then_block); + list_move_tail(&then_block.instrs, $5.then_block); + hlsl_block_init(&else_block); + if ($5.else_block) + list_move_tail(&else_block.instrs, $5.else_block); + vkd3d_free($5.then_block); + vkd3d_free($5.else_block); + + if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) YYABORT; - list_move_tail(&instr->then_instrs.instrs, $5.then_instrs); - if ($5.else_instrs) - list_move_tail(&instr->else_instrs.instrs, $5.else_instrs); - vkd3d_free($5.then_instrs); - vkd3d_free($5.else_instrs); if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string; if ((string = hlsl_type_to_string(ctx, condition->data_type))) - hlsl_error(ctx, &instr->node.loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "if condition type %s is not scalar.", string->buffer); hlsl_release_string_buffer(ctx, string); } $$ = $3; - list_add_tail($$, &instr->node.entry); + list_add_tail($$, &instr->entry); } if_body: statement { - $$.then_instrs = $1; - $$.else_instrs = NULL; + $$.then_block = $1; + $$.else_block = NULL; } | statement KW_ELSE statement { - $$.then_instrs = $1; - $$.else_instrs = $3; + $$.then_block = $1; + $$.else_block = $3; } loop_statement: - KW_WHILE '(' expr ')' statement + attribute_list_optional KW_WHILE '(' expr ')' statement { - $$ = create_loop(ctx, LOOP_WHILE, NULL, $3, NULL, $5, @1); + $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $4, NULL, $6, &@2); } - | KW_DO statement KW_WHILE '(' expr ')' ';' + | attribute_list_optional KW_DO statement KW_WHILE '(' expr ')' ';' { - $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); + $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $6, NULL, $3, &@2); } - | KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement + | attribute_list_optional KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); hlsl_pop_scope(ctx); } - | KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement + | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); + $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); hlsl_pop_scope(ctx); } @@ -5250,31 +5835,31 @@ func_arguments: primary_expr: C_FLOAT { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c; if (!(c = hlsl_new_float_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) YYABORT; } | C_INTEGER { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c; if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) YYABORT; } | boolean { - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c; if (!(c = hlsl_new_bool_constant(ctx, $1, &@1))) YYABORT; - if (!($$ = make_list(ctx, &c->node))) + if (!($$ = make_list(ctx, c))) { - hlsl_free_instr(&c->node); + hlsl_free_instr(c); YYABORT; } } @@ -5288,7 +5873,7 @@ primary_expr: hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); YYABORT; } - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; @@ -5316,7 +5901,7 @@ primary_expr: if (!(var = hlsl_new_synthetic_var(ctx, "state_block_expr", hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &@1))) YYABORT; - if (!(load = hlsl_new_var_load(ctx, var, @1))) + if (!(load = hlsl_new_var_load(ctx, var, &@1))) YYABORT; if (!($$ = make_list(ctx, &load->node))) YYABORT; @@ -5332,7 +5917,7 @@ postfix_expr: primary_expr | postfix_expr OP_INC { - if (!add_increment(ctx, $1, false, true, @2)) + if (!add_increment(ctx, $1, false, true, &@2)) { destroy_instr_list($1); YYABORT; @@ -5341,7 +5926,7 @@ postfix_expr: } | postfix_expr OP_DEC { - if (!add_increment(ctx, $1, true, true, @2)) + if (!add_increment(ctx, $1, true, true, &@2)) { destroy_instr_list($1); YYABORT; @@ -5352,7 +5937,7 @@ postfix_expr: { struct hlsl_ir_node *node = node_from_list($1); - if (node->data_type->type == HLSL_CLASS_STRUCT) + if (node->data_type->class == HLSL_CLASS_STRUCT) { struct hlsl_type *type = node->data_type; const struct hlsl_struct_field *field; @@ -5365,20 +5950,20 @@ postfix_expr: } field_idx = field - type->e.record.fields; - if (!add_record_load(ctx, $1, node, field_idx, @2)) + if (!add_record_access(ctx, $1, node, field_idx, &@2)) YYABORT; $$ = $1; } - else if (node->data_type->type <= HLSL_CLASS_LAST_NUMERIC) + else if (node->data_type->class <= HLSL_CLASS_LAST_NUMERIC) { - struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_node *swizzle; if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); YYABORT; } - list_add_tail($1, &swizzle->node.entry); + list_add_tail($1, &swizzle->entry); $$ = $1; } else @@ -5391,10 +5976,10 @@ postfix_expr: { struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); - list_move_tail($1, $3); + list_move_head($1, $3); vkd3d_free($3); - if (!add_array_load(ctx, $1, array, index, &@2)) + if (!add_array_access(ctx, $1, array, index, &@2)) { destroy_instr_list($1); YYABORT; @@ -5412,7 +5997,7 @@ postfix_expr: free_parse_initializer(&$4); YYABORT; } - if ($2->type > HLSL_CLASS_LAST_NUMERIC) + if ($2->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string; @@ -5432,7 +6017,7 @@ postfix_expr: YYABORT; } - if (!($$ = add_constructor(ctx, $2, &$4, @2))) + if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { free_parse_initializer(&$4); YYABORT; @@ -5459,7 +6044,7 @@ unary_expr: postfix_expr | OP_INC unary_expr { - if (!add_increment(ctx, $2, false, false, @1)) + if (!add_increment(ctx, $2, false, false, &@1)) { destroy_instr_list($2); YYABORT; @@ -5468,7 +6053,7 @@ unary_expr: } | OP_DEC unary_expr { - if (!add_increment(ctx, $2, true, false, @1)) + if (!add_increment(ctx, $2, true, false, &@1)) { destroy_instr_list($2); YYABORT; @@ -5545,31 +6130,31 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); } | mul_expr '/' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); } | mul_expr '%' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); } add_expr: mul_expr | add_expr '+' mul_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } | add_expr '-' mul_expr { struct hlsl_ir_node *neg; - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), @2))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) YYABORT; list_add_tail($3, &neg->entry); - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, @2); + $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } shift_expr: @@ -5587,30 +6172,30 @@ relational_expr: shift_expr | relational_expr '<' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); } | relational_expr '>' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, @2); + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); } | relational_expr OP_LE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); } | relational_expr OP_GE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); } equality_expr: relational_expr | equality_expr OP_EQ relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); } | equality_expr OP_NE relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, @2); + $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); } bitand_expr: @@ -5652,7 +6237,26 @@ conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { - hlsl_fixme(ctx, &@$, "Ternary operator."); + struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); + struct hlsl_type *common_type; + + list_move_tail($1, $3); + list_move_tail($1, $5); + vkd3d_free($3); + vkd3d_free($5); + + if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) + YYABORT; + + if (!(first = add_implicit_conversion(ctx, $1, first, common_type, &@3))) + YYABORT; + + if (!(second = add_implicit_conversion(ctx, $1, second, common_type, &@5))) + YYABORT; + + if (!hlsl_add_conditional(ctx, $1, cond, first, second)) + YYABORT; + $$ = $1; } assignment_expr: diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index ab59875738c..765b1907426 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -27,11 +27,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str enum hlsl_regset regset, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *idx_offset = NULL; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *c; - list_init(&block->instrs); + hlsl_block_init(block); - switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: idx_offset = idx; @@ -41,11 +41,11 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { if (!(c = hlsl_new_uint_constant(ctx, 4, loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c); - if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) + if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset); break; } @@ -56,25 +56,25 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str if (!(c = hlsl_new_uint_constant(ctx, size, loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c); - if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &c->node, idx))) + if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset); break; } case HLSL_CLASS_STRUCT: { - unsigned int field_idx = hlsl_ir_constant(idx)->value[0].u; + unsigned int field_idx = hlsl_ir_constant(idx)->value.u[0].u; struct hlsl_struct_field *field = &type->e.record.fields[field_idx]; if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset[regset], loc))) return NULL; - list_add_tail(&block->instrs, &c->node.entry); + hlsl_block_add_instr(block, c); - idx_offset = &c->node; + idx_offset = c; break; } @@ -87,7 +87,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, offset, idx_offset))) return NULL; - list_add_tail(&block->instrs, &idx_offset->entry); + hlsl_block_add_instr(block, idx_offset); } return idx_offset; @@ -101,7 +101,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st struct hlsl_type *type; unsigned int i; - list_init(&block->instrs); + hlsl_block_init(block); assert(deref->var); type = deref->var->data_type; @@ -114,7 +114,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st deref->offset_regset, loc))) return NULL; - list_move_tail(&block->instrs, &idx_block.instrs); + hlsl_block_add_block(block, &idx_block); type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); } @@ -123,15 +123,14 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st } /* TODO: remove when no longer needed, only used for transform_deref_paths_into_offsets() */ -static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, +static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { const struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block; - if (!deref->var) - return; + assert(deref->var); /* register offsets shouldn't be used before this point is reached. */ assert(!deref->offset.node); @@ -140,48 +139,22 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der /* Instructions that directly refer to structs or arrays (instead of single-register components) * are removed later by dce. So it is not a problem to just cleanup their derefs. */ - if (type->type == HLSL_CLASS_STRUCT || type->type == HLSL_CLASS_ARRAY) + if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY) { hlsl_cleanup_deref(deref); - return; + return true; } deref->offset_regset = hlsl_type_get_regset(type); if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) - return; + return false; list_move_before(&instr->entry, &block.instrs); hlsl_cleanup_deref(deref); hlsl_src_from_node(&deref->offset, offset); -} - -/* TODO: remove when no longer needed. */ -static bool transform_deref_paths_into_offsets(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -{ - switch(instr->type) - { - case HLSL_IR_LOAD: - replace_deref_path_with_offset(ctx, &hlsl_ir_load(instr)->src, instr); - return true; - - case HLSL_IR_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_store(instr)->lhs, instr); - return true; - case HLSL_IR_RESOURCE_LOAD: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->resource, instr); - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); - return true; - - case HLSL_IR_RESOURCE_STORE: - replace_deref_path_with_offset(ctx, &hlsl_ir_resource_store(instr)->resource, instr); - return true; - - default: - return false; - } - return false; + return true; } /* Split uniforms into two variables representing the constant and temp @@ -191,14 +164,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru { struct vkd3d_string_buffer *name; struct hlsl_ir_var *uniform; - struct hlsl_ir_store *store; + struct hlsl_ir_node *store; struct hlsl_ir_load *load; /* Use the synthetic name for the temp, rather than the uniform, so that we * can write the uniform name into the shader reflection data. */ if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, - temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) + &temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) return; list_add_before(&temp->scope_entry, &uniform->scope_entry); list_add_tail(&ctx->extern_vars, &uniform->extern_entry); @@ -212,17 +185,53 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru temp->name = hlsl_strdup(ctx, name->buffer); hlsl_release_string_buffer(ctx, name); - if (!(load = hlsl_new_var_load(ctx, uniform, temp->loc))) + if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) return; list_add_head(instrs, &load->node.entry); if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) return; - list_add_after(&load->node.entry, &store->node.entry); + list_add_after(&load->node.entry, &store->entry); +} + +static void validate_field_semantic(struct hlsl_ctx *ctx, struct hlsl_struct_field *field) +{ + if (!field->semantic.name && hlsl_get_multiarray_element_type(field->type)->class <= HLSL_CLASS_LAST_NUMERIC + && !field->semantic.reported_missing) + { + hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Field '%s' is missing a semantic.", field->name); + field->semantic.reported_missing = true; + } +} + +static enum hlsl_base_type base_type_get_semantic_equivalent(enum hlsl_base_type base) +{ + if (base == HLSL_TYPE_BOOL) + return HLSL_TYPE_UINT; + if (base == HLSL_TYPE_INT) + return HLSL_TYPE_UINT; + if (base == HLSL_TYPE_HALF) + return HLSL_TYPE_FLOAT; + return base; +} + +static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hlsl_type *type1, + const struct hlsl_type *type2) +{ + if (ctx->profile->major_version < 4) + return true; + + if (type1->dimx != type2->dimx) + return false; + + return base_type_get_semantic_equivalent(type1->base_type) + == base_type_get_semantic_equivalent(type2->base_type); } static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct hlsl_type *type, unsigned int modifiers, const struct hlsl_semantic *semantic, bool output) + struct hlsl_type *type, unsigned int modifiers, struct hlsl_semantic *semantic, + uint32_t index, bool output, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; struct vkd3d_string_buffer *name; @@ -230,15 +239,50 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir if (!(name = hlsl_get_string_buffer(ctx))) return NULL; - vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, semantic->index); + vkd3d_string_buffer_printf(name, "<%s-%s%u>", output ? "output" : "input", semantic->name, index); + + LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!ascii_strcasecmp(ext_var->name, name->buffer)) + { + if (output) + { + if (index >= semantic->reported_duplicated_output_next_index) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Output semantic \"%s%u\" is used multiple times.", semantic->name, index); + hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, + "First use of \"%s%u\" is here.", semantic->name, index); + semantic->reported_duplicated_output_next_index = index + 1; + } + } + else + { + if (index >= semantic->reported_duplicated_input_incompatible_next_index + && !types_are_semantic_equivalent(ctx, ext_var->data_type, type)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Input semantic \"%s%u\" is used multiple times with incompatible types.", + semantic->name, index); + hlsl_note(ctx, &ext_var->loc, HLSL_LEVEL_ERROR, + "First declaration of \"%s%u\" is here.", semantic->name, index); + semantic->reported_duplicated_input_incompatible_next_index = index + 1; + } + } + + hlsl_release_string_buffer(ctx, name); + return ext_var; + } + } + if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) { hlsl_release_string_buffer(ctx, name); return NULL; } - new_semantic.index = semantic->index; - if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), - type, var->loc, &new_semantic, modifiers, NULL))) + new_semantic.index = index; + if (!(ext_var = hlsl_new_var(ctx, hlsl_strdup(ctx, name->buffer), type, loc, &new_semantic, + modifiers, NULL))) { hlsl_release_string_buffer(ctx, name); hlsl_cleanup_semantic(&new_semantic); @@ -257,80 +301,116 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir } static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { - struct hlsl_type *type = lhs->node.data_type, *vector_type; + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; + struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_ir_var *var = lhs->src.var; + struct hlsl_ir_node *c; unsigned int i; - vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); + if (type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + if (!(string = hlsl_type_to_string(ctx, type))) + return; + hlsl_fixme(ctx, &var->loc, "Input semantics for type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + if (!semantic->name) + return; + + vector_type_src = hlsl_get_vector_type(ctx, type->base_type, + (ctx->profile->major_version < 4) ? 4 : hlsl_type_minor_size(type)); + vector_type_dst = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); for (i = 0; i < hlsl_type_major_size(type); ++i) { - struct hlsl_semantic semantic_copy = *semantic; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *store, *cast; struct hlsl_ir_var *input; struct hlsl_ir_load *load; - semantic_copy.index = semantic->index + i; - - if (!(input = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, false))) + if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, + semantic_index + i, false, loc))) return; - if (!(load = hlsl_new_var_load(ctx, input, var->loc))) + if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) return; list_add_after(&lhs->node.entry, &load->node.entry); - if (type->type == HLSL_CLASS_MATRIX) + if (!(cast = hlsl_new_cast(ctx, &load->node, vector_type_dst, &var->loc))) + return; + list_add_after(&load->node.entry, &cast->entry); + + if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_after(&load->node.entry, &c->node.entry); + list_add_after(&cast->entry, &c->entry); - if (!(store = hlsl_new_store_index(ctx, &lhs->src, &c->node, &load->node, 0, &var->loc))) + if (!(store = hlsl_new_store_index(ctx, &lhs->src, c, cast, 0, &var->loc))) return; - list_add_after(&c->node.entry, &store->node.entry); + list_add_after(&c->entry, &store->entry); } else { assert(i == 0); - if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, &load->node, 0, &var->loc))) + if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) return; - list_add_after(&load->node.entry, &store->node.entry); + list_add_after(&cast->entry, &store->entry); } } } -static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs) +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_type *type = lhs->node.data_type; struct hlsl_ir_var *var = lhs->src.var; - size_t i; + struct hlsl_ir_node *c; + unsigned int i; - for (i = 0; i < type->e.record.field_count; ++i) + if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - struct hlsl_ir_load *field_load; - struct hlsl_ir_constant *c; + struct hlsl_ir_load *element_load; + struct hlsl_struct_field *field; + uint32_t elem_semantic_index; - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - list_add_after(&lhs->node.entry, &c->node.entry); + for (i = 0; i < hlsl_type_element_count(type); ++i) + { + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + } + else + { + field = &type->e.record.fields[i]; + if (hlsl_type_is_resource(field->type)) + continue; + validate_field_semantic(ctx, field); + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; + } - /* This redundant load is expected to be deleted later by DCE. */ - if (!(field_load = hlsl_new_load_index(ctx, &lhs->src, &c->node, &var->loc))) - return; - list_add_after(&c->node.entry, &field_load->node.entry); + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; + list_add_after(&lhs->node.entry, &c->entry); - if (field->type->type == HLSL_CLASS_STRUCT) - prepend_input_struct_copy(ctx, instrs, field_load); - else if (field->semantic.name) - prepend_input_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); - else - hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Field '%s' is missing a semantic.", field->name); + /* This redundant load is expected to be deleted later by DCE. */ + if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) + return; + list_add_after(&c->entry, &element_load->node.entry); + + prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { + prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); } } @@ -341,45 +421,51 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; list_add_head(instrs, &load->node.entry); - if (var->data_type->type == HLSL_CLASS_STRUCT) - prepend_input_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - prepend_input_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); + prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); } static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, - unsigned int modifiers, const struct hlsl_semantic *semantic) + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; + struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_ir_var *var = rhs->src.var; + struct hlsl_ir_node *c; unsigned int i; + if (type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; + if (!(string = hlsl_type_to_string(ctx, type))) + return; + hlsl_fixme(ctx, &var->loc, "Output semantics for type %s.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } + if (!semantic->name) + return; + vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); for (i = 0; i < hlsl_type_major_size(type); ++i) { - struct hlsl_semantic semantic_copy = *semantic; - struct hlsl_ir_store *store; - struct hlsl_ir_constant *c; + struct hlsl_ir_node *store; struct hlsl_ir_var *output; struct hlsl_ir_load *load; - semantic_copy.index = semantic->index + i; - - if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, &semantic_copy, true))) + if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) return; - if (type->type == HLSL_CLASS_MATRIX) + if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - list_add_tail(instrs, &c->node.entry); + list_add_tail(instrs, &c->entry); - if (!(load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; list_add_tail(instrs, &load->node.entry); } @@ -394,38 +480,57 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - list_add_tail(instrs, &store->node.entry); + list_add_tail(instrs, &store->entry); } } -static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs) +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { + struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_type *type = rhs->node.data_type; struct hlsl_ir_var *var = rhs->src.var; - size_t i; + struct hlsl_ir_node *c; + unsigned int i; - for (i = 0; i < type->e.record.field_count; ++i) + if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - struct hlsl_ir_load *field_load; - struct hlsl_ir_constant *c; + struct hlsl_ir_load *element_load; + struct hlsl_struct_field *field; + uint32_t elem_semantic_index; - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - list_add_tail(instrs, &c->node.entry); + for (i = 0; i < hlsl_type_element_count(type); ++i) + { + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + } + else + { + field = &type->e.record.fields[i]; + if (hlsl_type_is_resource(field->type)) + continue; + validate_field_semantic(ctx, field); + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; + } - /* This redundant load is expected to be deleted later by DCE. */ - if (!(field_load = hlsl_new_load_index(ctx, &rhs->src, &c->node, &var->loc))) - return; - list_add_tail(instrs, &field_load->node.entry); + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; + list_add_tail(instrs, &c->entry); - if (field->type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, field_load); - else if (field->semantic.name) - append_output_copy(ctx, instrs, field_load, field->storage_modifiers, &field->semantic); - else - hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Field '%s' is missing a semantic.", field->name); + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; + list_add_tail(instrs, &element_load->node.entry); + + append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { + append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); } } @@ -437,17 +542,14 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, st struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, var->loc))) + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; list_add_tail(instrs, &load->node.entry); - if (var->data_type->type == HLSL_CLASS_STRUCT) - append_output_struct_copy(ctx, instrs, load); - else if (var->semantic.name) - append_output_copy(ctx, instrs, load, var->storage_modifiers, &var->semantic); + append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); } -static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context) { struct hlsl_ir_node *instr, *next; @@ -459,11 +561,11 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - progress |= transform_ir(ctx, func, &iff->then_instrs, context); - progress |= transform_ir(ctx, func, &iff->else_instrs, context); + progress |= hlsl_transform_ir(ctx, func, &iff->then_block, context); + progress |= hlsl_transform_ir(ctx, func, &iff->else_block, context); } else if (instr->type == HLSL_IR_LOOP) - progress |= transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); + progress |= hlsl_transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context); progress |= func(ctx, instr, context); } @@ -471,6 +573,44 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx return progress; } +static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + bool res; + bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *) = context; + + switch(instr->type) + { + case HLSL_IR_LOAD: + res = func(ctx, &hlsl_ir_load(instr)->src, instr); + return res; + + case HLSL_IR_STORE: + res = func(ctx, &hlsl_ir_store(instr)->lhs, instr); + return res; + + case HLSL_IR_RESOURCE_LOAD: + res = func(ctx, &hlsl_ir_resource_load(instr)->resource, instr); + if (hlsl_ir_resource_load(instr)->sampler.var) + res |= func(ctx, &hlsl_ir_resource_load(instr)->sampler, instr); + return res; + + case HLSL_IR_RESOURCE_STORE: + res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); + return res; + + default: + return false; + } + return false; +} + +static bool transform_derefs(struct hlsl_ctx *ctx, + bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *), + struct hlsl_block *block) +{ + return hlsl_transform_ir(ctx, transform_instr_derefs, block, func); +} + struct recursive_call_ctx { const struct hlsl_ir_function_decl **backtrace; @@ -506,7 +646,7 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst return false; call_ctx->backtrace[call_ctx->count++] = decl; - transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); + hlsl_transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx); --call_ctx->count; @@ -516,21 +656,23 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static void insert_early_return_break(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) { - struct hlsl_ir_jump *jump; + struct hlsl_ir_node *iff, *jump; + struct hlsl_block then_block; struct hlsl_ir_load *load; - struct hlsl_ir_if *iff; - if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) + hlsl_block_init(&then_block); + + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) return; list_add_after(&cf_instr->entry, &load->node.entry); - if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) return; - list_add_after(&load->node.entry, &iff->node.entry); + hlsl_block_add_instr(&then_block, jump); - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, cf_instr->loc))) + if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc))) return; - list_add_tail(&iff->then_instrs.instrs, &jump->node.entry); + list_add_after(&load->node.entry, &iff->entry); } /* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ @@ -566,7 +708,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun * the CF instruction, shove it into an if block, and then lower that if * block. * - * (We could return a "did we make progress" boolean like transform_ir() + * (We could return a "did we make progress" boolean like hlsl_transform_ir() * and run this pass multiple times, but we already know the only block * that still needs to be addressed, so there's not much point.) * @@ -591,8 +733,8 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - has_early_return |= lower_return(ctx, func, &iff->then_instrs, in_loop); - has_early_return |= lower_return(ctx, func, &iff->else_instrs, in_loop); + has_early_return |= lower_return(ctx, func, &iff->then_block, in_loop); + has_early_return |= lower_return(ctx, func, &iff->else_block, in_loop); if (has_early_return) { @@ -628,18 +770,17 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (instr->type == HLSL_IR_JUMP) { struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); - struct hlsl_ir_constant *constant; - struct hlsl_ir_store *store; + struct hlsl_ir_node *constant, *store; if (jump->type == HLSL_IR_JUMP_RETURN) { if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) return false; - list_add_before(&jump->node.entry, &constant->node.entry); + list_add_before(&jump->node.entry, &constant->entry); - if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) + if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, constant))) return false; - list_add_after(&constant->node.entry, &store->node.entry); + list_add_after(&constant->entry, &store->entry); has_early_return = true; if (in_loop) @@ -675,9 +816,9 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun else if (cf_instr) { struct list *tail = list_tail(&block->instrs); + struct hlsl_ir_node *not, *iff; + struct hlsl_block then_block; struct hlsl_ir_load *load; - struct hlsl_ir_node *not; - struct hlsl_ir_if *iff; /* If we're in a loop, we should have used "break" instead. */ assert(!in_loop); @@ -685,21 +826,21 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun if (tail == &cf_instr->entry) return has_early_return; - if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) - return false; - list_add_tail(&block->instrs, &load->node.entry); + hlsl_block_init(&then_block); + list_move_slice_tail(&then_block.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + lower_return(ctx, func, &then_block, in_loop); - if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc))) return false; - list_add_tail(&block->instrs, ¬->entry); + hlsl_block_add_instr(block, &load->node); - if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, &cf_instr->loc))) return false; - list_add_tail(&block->instrs, &iff->node.entry); - - list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + hlsl_block_add_instr(block, not); - lower_return(ctx, func, &iff->then_instrs, in_loop); + if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &cf_instr->loc))) + return false; + list_add_tail(&block->instrs, &iff->entry); } return has_early_return; @@ -721,7 +862,6 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Function \"%s\" is not defined.", decl->func->name); - list_init(&block.instrs); if (!hlsl_clone_block(ctx, &block, &decl->body)) return false; list_move_before(&call->node.entry, &block.instrs); @@ -731,6 +871,142 @@ static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void * return true; } +static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_ir_node *index, + const struct vkd3d_shader_location *loc) +{ + unsigned int dim_count = index->data_type->dimx; + struct hlsl_ir_node *store, *zero; + struct hlsl_ir_load *coords_load; + struct hlsl_deref coords_deref; + struct hlsl_ir_var *coords; + + assert(dim_count < 4); + + if (!(coords = hlsl_new_synthetic_var(ctx, "coords", + hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) + return NULL; + + hlsl_init_simple_deref_from_var(&coords_deref, coords); + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, index, (1u << dim_count) - 1, loc))) + return NULL; + list_add_after(&index->entry, &store->entry); + + if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) + return NULL; + list_add_after(&store->entry, &zero->entry); + + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) + return NULL; + list_add_after(&zero->entry, &store->entry); + + if (!(coords_load = hlsl_new_var_load(ctx, coords, loc))) + return NULL; + list_add_after(&store->entry, &coords_load->node.entry); + + return &coords_load->node; +} + +/* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct + * record access before knowing if they will be used in the lhs of an assignment --in which case + * they are lowered into a deref-- or as the load of an element within a larger value. + * For the latter case, this pass takes care of lowering hlsl_ir_indexes into individual + * hlsl_ir_loads, or individual hlsl_ir_resource_loads, in case the indexing is a + * resource access. */ +static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *val, *store; + struct hlsl_deref var_deref; + struct hlsl_ir_index *index; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + + if (instr->type != HLSL_IR_INDEX) + return false; + index = hlsl_ir_index(instr); + val = index->val.node; + + if (hlsl_index_is_resource_access(index)) + { + unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim); + struct hlsl_ir_node *coords = index->idx.node; + struct hlsl_resource_load_params params = {0}; + struct hlsl_ir_node *load; + + assert(coords->data_type->class == HLSL_CLASS_VECTOR); + assert(coords->data_type->base_type == HLSL_TYPE_UINT); + assert(coords->data_type->dimx == dim_count); + + if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) + return false; + + params.type = HLSL_RESOURCE_LOAD; + params.resource = val; + params.coords = coords; + params.format = val->data_type->e.resource_format; + + if (!(load = hlsl_new_resource_load(ctx, ¶ms, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->entry); + hlsl_replace_node(instr, load); + return true; + } + + if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + + if (!(store = hlsl_new_simple_store(ctx, var, val))) + return false; + list_add_before(&instr->entry, &store->entry); + + if (hlsl_index_is_noncontiguous(index)) + { + struct hlsl_ir_node *mat = index->val.node; + struct hlsl_deref row_deref; + unsigned int i; + + assert(!hlsl_type_is_row_major(mat->data_type)); + + if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&row_deref, var); + + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *c; + + if (!(c = hlsl_new_uint_constant(ctx, i, &instr->loc))) + return false; + list_add_before(&instr->entry, &c->entry); + + if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + + if (!(load = hlsl_new_load_index(ctx, &load->src, index->idx.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + + if (!(store = hlsl_new_store_index(ctx, &row_deref, c, &load->node, 0, &instr->loc))) + return false; + list_add_before(&instr->entry, &store->entry); + } + + if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + hlsl_replace_node(instr, &load->node); + } + else + { + if (!(load = hlsl_new_load_index(ctx, &var_deref, index->idx.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + hlsl_replace_node(instr, &load->node); + } + return true; +} + /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -746,26 +1022,24 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type; - if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && src_type->dimx == 1) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->dimx == 1) { - struct hlsl_ir_node *replacement; - struct hlsl_ir_swizzle *swizzle; - struct hlsl_ir_expr *new_cast; + struct hlsl_ir_node *replacement, *new_cast, *swizzle; dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->base_type); /* We need to preserve the cast since it might be doing more than just * turning the scalar into a vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_scalar_type, &cast->node.loc))) return false; - list_add_after(&cast->node.entry, &new_cast->node.entry); - replacement = &new_cast->node; + list_add_after(&cast->node.entry, &new_cast->entry); + replacement = new_cast; if (dst_type->dimx != 1) { if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), dst_type->dimx, replacement, &cast->node.loc))) return false; - list_add_after(&new_cast->node.entry, &swizzle->node.entry); - replacement = &swizzle->node; + list_add_after(&new_cast->entry, &swizzle->entry); + replacement = swizzle; } hlsl_replace_node(&cast->node, replacement); @@ -949,9 +1223,9 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ path_node = deref->path[depth].node; subtype = hlsl_get_element_type_from_path_index(ctx, type, path_node); - if (type->type == HLSL_CLASS_STRUCT) + if (type->class == HLSL_CLASS_STRUCT) { - unsigned int idx = hlsl_ir_constant(path_node)->value[0].u; + unsigned int idx = hlsl_ir_constant(path_node)->value.u[0].u; for (i = 0; i < idx; ++i) comp_start += hlsl_type_component_count(type->e.record.fields[i].type); @@ -966,7 +1240,7 @@ static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ if (path_node->type == HLSL_IR_CONSTANT) { copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype, - depth + 1, hlsl_ir_constant(path_node)->value[0].u * subtype_comp_count, writemask); + depth + 1, hlsl_ir_constant(path_node)->value.u[0].u * subtype_comp_count, writemask); } else { @@ -1041,14 +1315,14 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count)); - if (instr->data_type->type != HLSL_CLASS_OBJECT) + if (instr->data_type->class != HLSL_CLASS_OBJECT) { - struct hlsl_ir_swizzle *swizzle_node; + struct hlsl_ir_node *swizzle_node; if (!(swizzle_node = hlsl_new_swizzle(ctx, ret_swizzle, instr_component_count, new_instr, &instr->loc))) return false; - list_add_before(&instr->entry, &swizzle_node->node.entry); - new_instr = &swizzle_node->node; + list_add_before(&instr->entry, &swizzle_node->entry); + new_instr = swizzle_node; } hlsl_replace_node(instr, new_instr); @@ -1061,9 +1335,9 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, { const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type); const struct hlsl_ir_var *var = deref->var; - union hlsl_constant_value values[4] = {0}; - struct hlsl_ir_constant *cons; + struct hlsl_constant_value values = {0}; unsigned int start, count, i; + struct hlsl_ir_node *cons; if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) return false; @@ -1076,21 +1350,17 @@ static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx, || value->node->type != HLSL_IR_CONSTANT) return false; - values[i] = hlsl_ir_constant(value->node)->value[value->component]; + values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component]; } - if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) + if (!(cons = hlsl_new_constant(ctx, instr->data_type, &values, &instr->loc))) return false; - cons->value[0] = values[0]; - cons->value[1] = values[1]; - cons->value[2] = values[2]; - cons->value[3] = values[3]; - list_add_before(&instr->entry, &cons->node.entry); + list_add_before(&instr->entry, &cons->entry); TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n", var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), cons); - hlsl_replace_node(instr, &cons->node); + hlsl_replace_node(instr, cons); return true; } @@ -1099,7 +1369,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, { struct hlsl_type *type = load->node.data_type; - switch (type->type) + switch (type->class) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: @@ -1220,7 +1490,7 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s { unsigned int writemask = store->writemask; - if (store->rhs.node->data_type->type == HLSL_CLASS_OBJECT) + if (store->rhs.node->data_type->class == HLSL_CLASS_OBJECT) writemask = VKD3DSP_WRITEMASK_0; copy_propagation_set_value(var_def, start, writemask, store->rhs.node); } @@ -1270,8 +1540,8 @@ static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); break; } @@ -1301,19 +1571,19 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if bool progress = false; copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->then_instrs, &inner_state); + progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); copy_propagation_state_destroy(&inner_state); copy_propagation_state_init(ctx, &inner_state, state); - progress |= copy_propagation_transform_block(ctx, &iff->else_instrs, &inner_state); + progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); copy_propagation_state_destroy(&inner_state); /* Ideally we'd invalidate the outer state looking at what was * touched in the two inner states, but this doesn't work for * loops (because we need to know what is invalidated in advance), * so we need copy_propagation_invalidate_from_block() anyway. */ - copy_propagation_invalidate_from_block(ctx, state, &iff->then_instrs); - copy_propagation_invalidate_from_block(ctx, state, &iff->else_instrs); + copy_propagation_invalidate_from_block(ctx, state, &iff->then_block); + copy_propagation_invalidate_from_block(ctx, state, &iff->else_block); return progress; } @@ -1379,7 +1649,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b return progress; } -static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) +bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) { struct copy_propagation_state state; bool progress; @@ -1471,7 +1741,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ static bool is_vec1(const struct hlsl_type *type) { - return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1); + return (type->class == HLSL_CLASS_SCALAR) || (type->class == HLSL_CLASS_VECTOR && type->dimx == 1); } static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -1505,21 +1775,20 @@ static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, const struct hlsl_ir_load *load, const unsigned int idx, struct hlsl_type *type) { - struct hlsl_ir_store *split_store; + struct hlsl_ir_node *split_store, *c; struct hlsl_ir_load *split_load; - struct hlsl_ir_constant *c; if (!(c = hlsl_new_uint_constant(ctx, idx, &store->node.loc))) return false; - list_add_before(&store->node.entry, &c->node.entry); + list_add_before(&store->node.entry, &c->entry); - if (!(split_load = hlsl_new_load_index(ctx, &load->src, &c->node, &store->node.loc))) + if (!(split_load = hlsl_new_load_index(ctx, &load->src, c, &store->node.loc))) return false; list_add_before(&store->node.entry, &split_load->node.entry); - if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, &c->node, &split_load->node, 0, &store->node.loc))) + if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, c, &split_load->node, 0, &store->node.loc))) return false; - list_add_before(&store->node.entry, &split_store->node.entry); + list_add_before(&store->node.entry, &split_store->entry); return true; } @@ -1538,7 +1807,7 @@ static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_ARRAY) + if (type->class != HLSL_CLASS_ARRAY) return false; element_type = type->e.array.type; @@ -1575,7 +1844,7 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_STRUCT) + if (type->class != HLSL_CLASS_STRUCT) return false; if (rhs->type != HLSL_IR_LOAD) @@ -1614,7 +1883,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr store = hlsl_ir_store(instr); rhs = store->rhs.node; type = rhs->data_type; - if (type->type != HLSL_CLASS_MATRIX) + if (type->class != HLSL_CLASS_MATRIX) return false; element_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type)); @@ -1649,22 +1918,21 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type; - if (src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) + if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && dst_type->dimx < src_type->dimx) { - struct hlsl_ir_swizzle *swizzle; - struct hlsl_ir_expr *new_cast; + struct hlsl_ir_node *new_cast, *swizzle; dst_vector_type = hlsl_get_vector_type(ctx, dst_type->base_type, src_type->dimx); /* We need to preserve the cast since it might be doing more than just * narrowing the vector. */ if (!(new_cast = hlsl_new_cast(ctx, cast->operands[0].node, dst_vector_type, &cast->node.loc))) return false; - list_add_after(&cast->node.entry, &new_cast->node.entry); - if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, &new_cast->node, &cast->node.loc))) + list_add_after(&cast->node.entry, &new_cast->entry); + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dst_type->dimx, new_cast, &cast->node.loc))) return false; - list_add_after(&new_cast->node.entry, &swizzle->node.entry); + list_add_after(&new_cast->entry, &swizzle->entry); - hlsl_replace_node(&cast->node, &swizzle->node); + hlsl_replace_node(&cast->node, swizzle); return true; } @@ -1684,8 +1952,7 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (next_instr->type == HLSL_IR_SWIZZLE) { - struct hlsl_ir_swizzle *new_swizzle; - struct hlsl_ir_node *new_instr; + struct hlsl_ir_node *new_swizzle; unsigned int combined_swizzle; combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, @@ -1695,9 +1962,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) return false; - new_instr = &new_swizzle->node; - list_add_before(&instr->entry, &new_instr->entry); - hlsl_replace_node(instr, new_instr); + list_add_before(&instr->entry, &new_swizzle->entry); + hlsl_replace_node(instr, new_swizzle); return true; } @@ -1725,6 +1991,81 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; } +static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *idx; + struct hlsl_deref *deref; + struct hlsl_type *type; + unsigned int i; + + if (instr->type != HLSL_IR_LOAD) + return false; + + deref = &hlsl_ir_load(instr)->src; + assert(deref->var); + + if (deref->path_len == 0) + return false; + + type = deref->var->data_type; + for (i = 0; i < deref->path_len - 1; ++i) + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + + idx = deref->path[deref->path_len - 1].node; + + if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + { + struct hlsl_ir_node *eq, *swizzle, *dot, *c, *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_constant_value value; + struct hlsl_ir_load *vector_load; + enum hlsl_ir_expr_op op; + + if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) + return false; + list_add_before(&instr->entry, &vector_load->node.entry); + + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + value.u[0].u = 0; + value.u[1].u = 1; + value.u[2].u = 2; + value.u[3].u = 3; + if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &value, &instr->loc))) + return false; + list_add_before(&instr->entry, &c->entry); + + operands[0] = swizzle; + operands[1] = c; + if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, + hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + if (!(eq = hlsl_new_cast(ctx, eq, type, &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + op = HLSL_OP2_DOT; + if (type->dimx == 1) + op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; + + /* Note: We may be creating a DOT for bool vectors here, which we need to lower to + * LOGIC_OR + LOGIC_AND. */ + operands[0] = &vector_load->node; + operands[1] = eq; + if (!(dot = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) + return false; + list_add_before(&instr->entry, &dot->entry); + hlsl_replace_node(instr, dot); + + return true; + } + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -1737,7 +2078,7 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi if (expr->op != HLSL_OP2_DIV) return false; - if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, instr->loc))) + if (!(rcp = hlsl_new_unary_expr(ctx, HLSL_OP1_RCP, expr->operands[1].node, &instr->loc))) return false; list_add_before(&expr->node.entry, &rcp->entry); expr->op = HLSL_OP2_MUL; @@ -1758,7 +2099,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c if (expr->op != HLSL_OP1_SQRT) return false; - if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, instr->loc))) + if (!(rsq = hlsl_new_unary_expr(ctx, HLSL_OP1_RSQ, expr->operands[0].node, &instr->loc))) return false; list_add_before(&expr->node.entry, &rsq->entry); expr->op = HLSL_OP1_RCP; @@ -1770,9 +2111,7 @@ static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *c /* Lower DP2 to MUL + ADD */ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *mul, *replacement; - struct hlsl_ir_swizzle *add_x, *add_y; - struct hlsl_ir_constant *zero; + struct hlsl_ir_node *arg1, *arg2, *mul, *replacement, *zero, *add_x, *add_y; struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) @@ -1791,11 +2130,11 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (!(zero = hlsl_new_float_constant(ctx, 0.0f, &expr->node.loc))) return false; - list_add_before(&instr->entry, &zero->node.entry); + list_add_before(&instr->entry, &zero->entry); operands[0] = arg1; operands[1] = arg2; - operands[2] = &zero->node; + operands[2] = zero; if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc))) return false; @@ -1808,13 +2147,13 @@ static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (!(add_x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), instr->data_type->dimx, mul, &expr->node.loc))) return false; - list_add_before(&instr->entry, &add_x->node.entry); + list_add_before(&instr->entry, &add_x->entry); if (!(add_y = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Y, Y, Y), instr->data_type->dimx, mul, &expr->node.loc))) return false; - list_add_before(&instr->entry, &add_y->node.entry); + list_add_before(&instr->entry, &add_y->entry); - if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, &add_x->node, &add_y->node))) + if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, add_x, add_y))) return false; } list_add_before(&instr->entry, &replacement->entry); @@ -1836,7 +2175,7 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co if (expr->op != HLSL_OP1_ABS) return false; - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry); @@ -1848,77 +2187,124 @@ static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *co return true; } -static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +/* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */ +static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_type *type = instr->data_type, *arg_type; - struct hlsl_ir_constant *zero; + struct hlsl_ir_node *arg, *neg, *sum, *frc, *half, *replacement; + struct hlsl_type *type = instr->data_type; + struct hlsl_constant_value half_value; + unsigned int i, component_count; struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) return false; + expr = hlsl_ir_expr(instr); - if (expr->op != HLSL_OP1_CAST) - return false; - arg_type = expr->operands[0].node->data_type; - if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) + arg = expr->operands[0].node; + if (expr->op != HLSL_OP1_ROUND) return false; - if (type->base_type != HLSL_TYPE_BOOL) + + component_count = hlsl_type_component_count(type); + for (i = 0; i < component_count; ++i) + half_value.u[i].f = 0.5f; + if (!(half = hlsl_new_constant(ctx, type, &half_value, &expr->node.loc))) return false; - /* Narrowing casts should have already been lowered. */ - assert(type->dimx == arg_type->dimx); + list_add_before(&instr->entry, &half->entry); - zero = hlsl_new_constant(ctx, arg_type, &instr->loc); - if (!zero) + if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, half))) return false; - list_add_before(&instr->entry, &zero->node.entry); + list_add_before(&instr->entry, &sum->entry); - expr->op = HLSL_OP2_NEQUAL; - hlsl_src_from_node(&expr->operands[1], &zero->node); + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, sum, &instr->loc))) + return false; + list_add_before(&instr->entry, &frc->entry); + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, frc, &instr->loc))) + return false; + list_add_before(&instr->entry, &neg->entry); + + if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, sum, neg))) + return false; + list_add_before(&instr->entry, &replacement->entry); + + hlsl_replace_node(instr, replacement); return true; } -struct hlsl_ir_load *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, +static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_type *type = instr->data_type, *arg_type; + static const struct hlsl_constant_value zero_value; + struct hlsl_ir_node *zero; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP1_CAST) + return false; + arg_type = expr->operands[0].node->data_type; + if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_BOOL) + return false; + + /* Narrowing casts should have already been lowered. */ + assert(type->dimx == arg_type->dimx); + + zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); + if (!zero) + return false; + list_add_before(&instr->entry, &zero->entry); + + expr->op = HLSL_OP2_NEQUAL; + hlsl_src_from_node(&expr->operands[1], zero); + + return true; +} + +struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false) { - struct hlsl_ir_store *store; + struct hlsl_block then_block, else_block; + struct hlsl_ir_node *iff, *store; struct hlsl_ir_load *load; struct hlsl_ir_var *var; - struct hlsl_ir_if *iff; assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); if (!(var = hlsl_new_synthetic_var(ctx, "conditional", if_true->data_type, &condition->loc))) return NULL; - if (!(iff = hlsl_new_if(ctx, condition, condition->loc))) - return NULL; - list_add_tail(instrs, &iff->node.entry); + hlsl_block_init(&then_block); + hlsl_block_init(&else_block); if (!(store = hlsl_new_simple_store(ctx, var, if_true))) return NULL; - list_add_tail(&iff->then_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&then_block, store); if (!(store = hlsl_new_simple_store(ctx, var, if_false))) return NULL; - list_add_tail(&iff->else_instrs.instrs, &store->node.entry); + hlsl_block_add_instr(&else_block, store); - if (!(load = hlsl_new_var_load(ctx, var, condition->loc))) + if (!(iff = hlsl_new_if(ctx, condition, &then_block, &else_block, &condition->loc))) + return NULL; + list_add_tail(instrs, &iff->entry); + + if (!(load = hlsl_new_var_load(ctx, var, &condition->loc))) return NULL; list_add_tail(instrs, &load->node.entry); - return load; + return &load->node; } static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg; + struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; + struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; - struct hlsl_ir_load *cond; unsigned int i; if (instr->type != HLSL_IR_EXPR) @@ -1928,69 +2314,67 @@ static bool lower_int_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_DIV) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); if (!(xor = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_XOR, arg1, arg2))) return false; list_add_before(&instr->entry, &xor->entry); - if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - high_bit->value[i].u = 0x80000000; - list_add_before(&instr->entry, &high_bit->node.entry); + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &high_bit->entry); - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, xor, high_bit))) return false; list_add_before(&instr->entry, &and->entry); - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; list_add_before(&instr->entry, &abs1->entry); if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); + list_add_before(&instr->entry, &cast1->entry); - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &abs2->entry); if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); + list_add_before(&instr->entry, &cast2->entry); - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, cast1, cast2))) return false; list_add_before(&instr->entry, &div->entry); if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); + list_add_before(&instr->entry, &cast3->entry); - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry); - if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) + if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) return false; - hlsl_replace_node(instr, &cond->node); + hlsl_replace_node(instr, cond); return true; } static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg; + struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *cond, *high_bit; struct hlsl_type *type = instr->data_type, *utype; - struct hlsl_ir_expr *cast1, *cast2, *cast3; - struct hlsl_ir_constant *high_bit; + struct hlsl_constant_value high_bit_value; struct hlsl_ir_expr *expr; - struct hlsl_ir_load *cond; unsigned int i; if (instr->type != HLSL_IR_EXPR) @@ -2000,53 +2384,53 @@ static bool lower_int_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false; - utype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_UINT, type->dimx, type->dimy); + utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); - if (!(high_bit = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - high_bit->value[i].u = 0x80000000; - list_add_before(&instr->entry, &high_bit->node.entry); + high_bit_value.u[i].u = 0x80000000; + if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &high_bit->entry); - if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, &high_bit->node))) + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, arg1, high_bit))) return false; list_add_before(&instr->entry, &and->entry); - if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, instr->loc))) + if (!(abs1 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg1, &instr->loc))) return false; list_add_before(&instr->entry, &abs1->entry); if (!(cast1 = hlsl_new_cast(ctx, abs1, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast1->node.entry); + list_add_before(&instr->entry, &cast1->entry); - if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, instr->loc))) + if (!(abs2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &abs2->entry); if (!(cast2 = hlsl_new_cast(ctx, abs2, utype, &instr->loc))) return false; - list_add_before(&instr->entry, &cast2->node.entry); + list_add_before(&instr->entry, &cast2->entry); - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, &cast1->node, &cast2->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_MOD, cast1, cast2))) return false; list_add_before(&instr->entry, &div->entry); if (!(cast3 = hlsl_new_cast(ctx, div, type, &instr->loc))) return false; - list_add_before(&instr->entry, &cast3->node.entry); + list_add_before(&instr->entry, &cast3->entry); - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, &cast3->node, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cast3, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry); - if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, &cast3->node))) + if (!(cond = hlsl_add_conditional(ctx, &instr->entry, and, neg, cast3))) return false; - hlsl_replace_node(instr, &cond->node); + hlsl_replace_node(instr, cond); return true; } @@ -2063,14 +2447,14 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void if (expr->op != HLSL_OP1_ABS) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_INT) return false; arg = expr->operands[0].node; - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, instr->loc))) + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg, &instr->loc))) return false; list_add_before(&instr->entry, &neg->entry); @@ -2080,12 +2464,63 @@ static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void return true; } +static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *arg1, *arg2, *mult, *comps[4] = {0}, *res; + struct hlsl_type *type = instr->data_type; + struct hlsl_ir_expr *expr; + unsigned int i, dimx; + bool is_bool; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (expr->op != HLSL_OP2_DOT) + return false; + + if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT + || type->base_type == HLSL_TYPE_BOOL) + { + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + assert(arg1->data_type->dimx == arg2->data_type->dimx); + dimx = arg1->data_type->dimx; + is_bool = type->base_type == HLSL_TYPE_BOOL; + + if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) + return false; + list_add_before(&instr->entry, &mult->entry); + + for (i = 0; i < dimx; ++i) + { + unsigned int s = hlsl_swizzle_from_writemask(1 << i); + + if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, mult, &instr->loc))) + return false; + list_add_before(&instr->entry, &comps[i]->entry); + } + + res = comps[0]; + for (i = 1; i < dimx; ++i) + { + if (!(res = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]))) + return false; + list_add_before(&instr->entry, &res->entry); + } + + hlsl_replace_node(instr, res); + return true; + } + + return false; +} + static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc; + struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one; struct hlsl_type *type = instr->data_type, *btype; - struct hlsl_ir_constant *one; - struct hlsl_ir_load *cond; + struct hlsl_constant_value one_value; struct hlsl_ir_expr *expr; unsigned int i; @@ -2096,17 +2531,17 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr arg2 = expr->operands[1].node; if (expr->op != HLSL_OP2_MOD) return false; - if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR) + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR) return false; if (type->base_type != HLSL_TYPE_FLOAT) return false; - btype = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_BOOL, type->dimx, type->dimy); + btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy); if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg2, arg1))) return false; list_add_before(&instr->entry, &mul1->entry); - if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, instr->loc))) + if (!(neg1 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, &instr->loc))) return false; list_add_before(&instr->entry, &neg1->entry); @@ -2115,20 +2550,20 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr ge->data_type = btype; list_add_before(&instr->entry, &ge->entry); - if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, instr->loc))) + if (!(neg2 = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2, &instr->loc))) return false; list_add_before(&instr->entry, &neg2->entry); if (!(cond = hlsl_add_conditional(ctx, &instr->entry, ge, arg2, neg2))) return false; - if (!(one = hlsl_new_constant(ctx, type, &instr->loc))) - return false; for (i = 0; i < type->dimx; ++i) - one->value[i].f = 1.0f; - list_add_before(&instr->entry, &one->node.entry); + one_value.u[i].f = 1.0f; + if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc))) + return false; + list_add_before(&instr->entry, &one->entry); - if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, &one->node, &cond->node))) + if (!(div = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, one, cond))) return false; list_add_before(&instr->entry, &div->entry); @@ -2136,7 +2571,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; list_add_before(&instr->entry, &mul2->entry); - if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, instr->loc))) + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mul2, &instr->loc))) return false; list_add_before(&instr->entry, &frc->entry); @@ -2144,7 +2579,7 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr hlsl_src_remove(&expr->operands[0]); hlsl_src_remove(&expr->operands[1]); hlsl_src_from_node(&expr->operands[0], frc); - hlsl_src_from_node(&expr->operands[1], &cond->node); + hlsl_src_from_node(&expr->operands[1], cond); return true; } @@ -2155,6 +2590,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { case HLSL_IR_CONSTANT: case HLSL_IR_EXPR: + case HLSL_IR_INDEX: case HLSL_IR_LOAD: case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_SWIZZLE: @@ -2204,8 +2640,8 @@ static unsigned int index_instructions(struct hlsl_block *block, unsigned int in if (instr->type == HLSL_IR_IF) { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - index = index_instructions(&iff->then_instrs, index); - index = index_instructions(&iff->else_instrs, index); + index = index_instructions(&iff->then_block, index); + index = index_instructions(&iff->else_block, index); } else if (instr->type == HLSL_IR_LOOP) { @@ -2262,9 +2698,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) continue; regset = hlsl_type_get_regset(var->data_type); - if (var->reg_reservation.type) + if (var->reg_reservation.reg_type && var->regs[regset].bind_count) { - if (var->reg_reservation.type != get_regset_name(regset)) + if (var->reg_reservation.reg_type != get_regset_name(regset)) { struct vkd3d_string_buffer *type_string; @@ -2277,8 +2713,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) else { var->regs[regset].allocated = true; - var->regs[regset].id = var->reg_reservation.index; - TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.type, var->reg_reservation.index); + var->regs[regset].id = var->reg_reservation.reg_index; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, + var->reg_reservation.reg_index + var->regs[regset].bind_count); } } } @@ -2286,9 +2724,9 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend - * to at least the range of the entire loop. Note that we don't need to do this - * for anonymous nodes, since there's currently no way to use a node which was - * calculated in an earlier iteration of the loop. */ + * to at least the range of the entire loop. We also do this for nodes, so that + * nodes produced before the loop have their temp register protected from being + * overridden after the last read within an iteration. */ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last) { struct hlsl_ir_node *instr; @@ -2296,7 +2734,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { - const unsigned int var_last_read = loop_last ? max(instr->index, loop_last) : instr->index; + const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index; switch (instr->type) { @@ -2311,9 +2749,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop var = store->lhs.var; if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; - store->rhs.node->last_read = instr->index; + store->rhs.node->last_read = last_read; if (store->lhs.offset.node) - store->lhs.offset.node->last_read = instr->index; + store->lhs.offset.node->last_read = last_read; break; } case HLSL_IR_EXPR: @@ -2322,16 +2760,16 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop unsigned int i; for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i) - expr->operands[i].node->last_read = instr->index; + expr->operands[i].node->last_read = last_read; break; } case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - compute_liveness_recurse(&iff->then_instrs, loop_first, loop_last); - compute_liveness_recurse(&iff->else_instrs, loop_first, loop_last); - iff->condition.node->last_read = instr->index; + compute_liveness_recurse(&iff->then_block, loop_first, loop_last); + compute_liveness_recurse(&iff->else_block, loop_first, loop_last); + iff->condition.node->last_read = last_read; break; } case HLSL_IR_LOAD: @@ -2339,9 +2777,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_load *load = hlsl_ir_load(instr); var = load->src.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->src.offset.node) - load->src.offset.node->last_read = instr->index; + load->src.offset.node->last_read = last_read; break; } case HLSL_IR_LOOP: @@ -2357,22 +2795,30 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); var = load->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->resource.offset.node) - load->resource.offset.node->last_read = instr->index; + load->resource.offset.node->last_read = last_read; if ((var = load->sampler.var)) { - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (load->sampler.offset.node) - load->sampler.offset.node->last_read = instr->index; + load->sampler.offset.node->last_read = last_read; } - load->coords.node->last_read = instr->index; + load->coords.node->last_read = last_read; if (load->texel_offset.node) - load->texel_offset.node->last_read = instr->index; + load->texel_offset.node->last_read = last_read; if (load->lod.node) - load->lod.node->last_read = instr->index; + load->lod.node->last_read = last_read; + if (load->ddx.node) + load->ddx.node->last_read = last_read; + if (load->ddy.node) + load->ddy.node->last_read = last_read; + if (load->sample_index.node) + load->sample_index.node->last_read = last_read; + if (load->cmp.node) + load->cmp.node->last_read = last_read; break; } case HLSL_IR_RESOURCE_STORE: @@ -2380,18 +2826,26 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); var = store->resource.var; - var->last_read = max(var->last_read, var_last_read); + var->last_read = max(var->last_read, last_read); if (store->resource.offset.node) - store->resource.offset.node->last_read = instr->index; - store->coords.node->last_read = instr->index; - store->value.node->last_read = instr->index; + store->resource.offset.node->last_read = last_read; + store->coords.node->last_read = last_read; + store->value.node->last_read = last_read; break; } case HLSL_IR_SWIZZLE: { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - swizzle->val.node->last_read = instr->index; + swizzle->val.node->last_read = last_read; + break; + } + case HLSL_IR_INDEX: + { + struct hlsl_ir_index *index = hlsl_ir_index(instr); + + index->val.node->last_read = last_read; + index->idx.node->last_read = last_read; break; } case HLSL_IR_CONSTANT: @@ -2426,127 +2880,142 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl compute_liveness_recurse(&entry_func->body, 0, 0); } -struct liveness +struct register_allocator { - size_t size; - uint32_t reg_count; - struct + size_t count, capacity; + + /* Highest register index that has been allocated. + * Used to declare sm4 temp count. */ + uint32_t max_reg; + + struct allocation { - /* 0 if not live yet. */ - unsigned int last_read; - } *regs; + uint32_t reg; + unsigned int writemask; + unsigned int first_write, last_read; + } *allocations; }; -static unsigned int get_available_writemask(struct liveness *liveness, - unsigned int first_write, unsigned int component_idx, unsigned int reg_size) +static unsigned int get_available_writemask(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx) { - unsigned int i, writemask = 0, count = 0; + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i; - for (i = 0; i < 4; ++i) + for (i = 0; i < allocator->count; ++i) { - if (liveness->regs[component_idx + i].last_read <= first_write) - { - writemask |= 1u << i; - if (++count == reg_size) - return writemask; - } + const struct allocation *allocation = &allocator->allocations[i]; + + /* We do not overlap if first write == last read: + * this is the case where we are allocating the result of that + * expression, e.g. "add r0, r0, r1". */ + + if (allocation->reg == reg_idx + && first_write < allocation->last_read && last_read > allocation->first_write) + writemask &= ~allocation->writemask; + + if (!writemask) + break; } - return 0; + return writemask; } -static bool resize_liveness(struct hlsl_ctx *ctx, struct liveness *liveness, size_t new_count) +static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, + uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) { - size_t old_capacity = liveness->size; + struct allocation *allocation; - if (!hlsl_array_reserve(ctx, (void **)&liveness->regs, &liveness->size, new_count, sizeof(*liveness->regs))) - return false; + if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity, + allocator->count + 1, sizeof(*allocator->allocations))) + return; - if (liveness->size > old_capacity) - memset(liveness->regs + old_capacity, 0, (liveness->size - old_capacity) * sizeof(*liveness->regs)); - return true; + allocation = &allocator->allocations[allocator->count++]; + allocation->reg = reg_idx; + allocation->writemask = writemask; + allocation->first_write = first_write; + allocation->last_read = last_read; + + allocator->max_reg = max(allocator->max_reg, reg_idx); } /* reg_size is the number of register components to be reserved, while component_count is the number * of components for the register's writemask. In SM1, floats and vectors allocate the whole * register, even if they don't use it completely. */ -static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size, unsigned int component_count) { - unsigned int component_idx, writemask, i; struct hlsl_reg ret = {0}; + unsigned int writemask; + uint32_t reg_idx; assert(component_count <= reg_size); - for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if ((writemask = get_available_writemask(liveness, first_write, component_idx, reg_size))) + writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); + + if (vkd3d_popcount(writemask) >= reg_size) + { + writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); break; + } } - if (component_idx == liveness->size) - { - if (!resize_liveness(ctx, liveness, component_idx + 4)) - return ret; - writemask = (1u << reg_size) - 1; - } - for (i = 0; i < 4; ++i) - { - if (writemask & (1u << i)) - liveness->regs[component_idx + i].last_read = last_read; - } - ret.id = component_idx / 4; + + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); + + ret.id = reg_idx; + ret.bind_count = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + 1); return ret; } -static bool is_range_available(struct liveness *liveness, unsigned int first_write, - unsigned int component_idx, unsigned int reg_size) +static bool is_range_available(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) { - unsigned int i; + uint32_t i; - for (i = 0; i < reg_size; i += 4) + for (i = 0; i < (reg_size / 4); ++i) { - if (!get_available_writemask(liveness, first_write, component_idx + i, 4)) + if (get_available_writemask(allocator, first_write, last_read, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) return false; } return true; } -static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, unsigned int reg_size) { - unsigned int i, component_idx; struct hlsl_reg ret = {0}; + uint32_t reg_idx; + unsigned int i; - for (component_idx = 0; component_idx < liveness->size; component_idx += 4) + for (reg_idx = 0;; ++reg_idx) { - if (is_range_available(liveness, first_write, component_idx, - min(reg_size, liveness->size - component_idx))) + if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) break; } - if (!resize_liveness(ctx, liveness, component_idx + reg_size)) - return ret; - for (i = 0; i < reg_size; ++i) - liveness->regs[component_idx + i].last_read = last_read; - ret.id = component_idx / 4; + for (i = 0; i < reg_size / 4; ++i) + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); + + ret.id = reg_idx; + ret.bind_count = align(reg_size, 4) / 4; ret.allocated = true; - liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); return ret; } -static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct liveness *liveness, +static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) { unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; - if (type->type <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, liveness, first_write, last_read, reg_size, type->dimx); + if (type->class <= HLSL_CLASS_VECTOR) + return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); else - return allocate_range(ctx, liveness, first_write, last_read, reg_size); + return allocate_range(ctx, allocator, first_write, last_read, reg_size); } static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) @@ -2565,14 +3034,99 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); } -static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) +static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + + if (regset == HLSL_REGSET_SAMPLERS) + { + enum hlsl_sampler_dim dim; + + assert(!load->sampler.var); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) + { + if (dim == HLSL_SAMPLER_DIM_GENERIC) + { + var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc; + } + else + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Inconsistent generic sampler usage dimension."); + hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc, + VKD3D_SHADER_LOG_ERROR, "First use is here."); + return false; + } + } + var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + } + else + { + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + } + } + + return false; +} + +static void calculate_resource_register_counts(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + struct hlsl_type *type; + unsigned int i, k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + type = var->data_type; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + for (i = 0; i < type->reg_size[k]; ++i) + { + /* Samplers are only allocated until the last used one. */ + if (var->objects_usage[k][i].used) + var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + } + } + } +} + +static void allocate_variable_temp_register(struct hlsl_ctx *ctx, + struct hlsl_ir_var *var, struct register_allocator *allocator) { if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) return; if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, var->first_write, var->last_read, var->data_type); TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', @@ -2580,15 +3134,20 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir } } -static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_ir_node *instr; LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { + /* In SM4 all constants are inlined. */ + if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) + continue; + if (!instr->reg.allocated && instr->last_read) { - instr->reg = allocate_numeric_registers_for_type(ctx, liveness, instr->index, instr->last_read, + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, instr->data_type); TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); @@ -2599,8 +3158,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_temp_registers_recurse(ctx, &iff->then_instrs, liveness); - allocate_temp_registers_recurse(ctx, &iff->else_instrs, liveness); + allocate_temp_registers_recurse(ctx, &iff->then_block, allocator); + allocate_temp_registers_recurse(ctx, &iff->else_block, allocator); break; } @@ -2609,21 +3168,21 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl struct hlsl_ir_load *load = hlsl_ir_load(instr); /* We need to at least allocate a variable for undefs. * FIXME: We should probably find a way to remove them instead. */ - allocate_variable_temp_register(ctx, load->src.var, liveness); + allocate_variable_temp_register(ctx, load->src.var, allocator); break; } case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_temp_registers_recurse(ctx, &loop->body, liveness); + allocate_temp_registers_recurse(ctx, &loop->body, allocator); break; } case HLSL_IR_STORE: { struct hlsl_ir_store *store = hlsl_ir_store(instr); - allocate_variable_temp_register(ctx, store->lhs.var, liveness); + allocate_variable_temp_register(ctx, store->lhs.var, allocator); break; } @@ -2633,7 +3192,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_bl } } -static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct liveness *liveness) +static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct register_allocator *allocator) { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr; @@ -2649,7 +3209,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b unsigned int x, y, i, writemask, end_reg; unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; - constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, @@ -2662,7 +3222,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b defs->count = end_reg; } - assert(type->type <= HLSL_CLASS_LAST_NUMERIC); + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); if (!(writemask = constant->reg.writemask)) writemask = (1u << type->dimx) - 1; @@ -2671,12 +3231,12 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b { for (x = 0, i = 0; x < 4; ++x) { - const union hlsl_constant_value *value; + const union hlsl_constant_value_component *value; float f; if (!(writemask & (1u << x))) continue; - value = &constant->value[i++]; + value = &constant->value.u[i++]; switch (type->base_type) { @@ -2714,15 +3274,15 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_const_registers_recurse(ctx, &iff->then_instrs, liveness); - allocate_const_registers_recurse(ctx, &iff->else_instrs, liveness); + allocate_const_registers_recurse(ctx, &iff->then_block, allocator); + allocate_const_registers_recurse(ctx, &iff->else_block, allocator); break; } case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_const_registers_recurse(ctx, &loop->body, liveness); + allocate_const_registers_recurse(ctx, &loop->body, allocator); break; } @@ -2734,10 +3294,10 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; + struct register_allocator allocator = {0}; struct hlsl_ir_var *var; - allocate_const_registers_recurse(ctx, &entry_func->body, &liveness); + allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -2748,12 +3308,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (reg_size == 0) continue; - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &liveness, + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, 1, UINT_MAX, var->data_type); TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } + + vkd3d_free(allocator.allocations); } /* Simple greedy temporary register allocation pass that just assigns a unique @@ -2762,15 +3324,33 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * does not handle constants. */ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct liveness liveness = {0}; - allocate_temp_registers_recurse(ctx, &entry_func->body, &liveness); - ctx->temp_count = liveness.reg_count; - vkd3d_free(liveness.regs); + struct register_allocator allocator = {0}; + + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + size_t i; + + for (i = 0; i < entry_func->parameters.count; ++i) + { + const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; + + if (var->is_output_semantic) + { + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); + break; + } + } + } + + allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); + ctx->temp_count = allocator.max_reg + 1; + vkd3d_free(allocator.allocations); } static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) { - static const char *shader_names[] = + static const char *const shader_names[] = { [VKD3D_SHADER_TYPE_PIXEL] = "Pixel", [VKD3D_SHADER_TYPE_VERTEX] = "Vertex", @@ -2791,7 +3371,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var D3DDECLUSAGE usage; uint32_t usage_idx; - if (!hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + return; + + builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); + if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -2800,8 +3385,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if ((!output && !var->last_read) || (output && !var->first_write)) return; - - builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &type, ®); } else { @@ -2827,6 +3410,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; + var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -2853,23 +3437,117 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) { - if (buffer->used_size && buffer->reservation.type == 'b' && buffer->reservation.index == index) + if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) return buffer; } return NULL; } -static void calculate_buffer_offset(struct hlsl_ir_var *var) +static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) { + unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + enum hlsl_type_class var_class = var->data_type->class; struct hlsl_buffer *buffer = var->buffer; - buffer->size = hlsl_type_get_sm4_offset(var->data_type, buffer->size); + if (var->reg_reservation.offset_type == 'c') + { + if (var->reg_reservation.offset_index % 4) + { + if (var_class == HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with matrix types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_ARRAY) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with array types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_STRUCT) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with struct types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_VECTOR) + { + unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index); + + if (var->reg_reservation.offset_index != aligned_offset) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with vector types cannot span multiple registers."); + } + } + var->buffer_offset = var->reg_reservation.offset_index; + } + else + { + var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size); + } - var->buffer_offset = buffer->size; TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); - buffer->size += var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); if (var->last_read) - buffer->used_size = buffer->size; + buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); +} + +static void validate_buffer_offsets(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var1, *var2; + struct hlsl_buffer *buffer; + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + continue; + + buffer = var1->buffer; + if (!buffer->used_size) + continue; + + LIST_FOR_EACH_ENTRY(var2, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int var1_reg_size, var2_reg_size; + + if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + continue; + + if (var1 == var2 || var1->buffer != var2->buffer) + continue; + + /* This is to avoid reporting the error twice for the same pair of overlapping variables. */ + if (strcmp(var1->name, var2->name) >= 0) + continue; + + var1_reg_size = var1->data_type->reg_size[HLSL_REGSET_NUMERIC]; + var2_reg_size = var2->data_type->reg_size[HLSL_REGSET_NUMERIC]; + + if (var1->buffer_offset < var2->buffer_offset + var2_reg_size + && var2->buffer_offset < var1->buffer_offset + var1_reg_size) + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid packoffset() reservation: Variables %s and %s overlap.", + var1->name, var2->name); + } + } + + LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + buffer = var1->buffer; + if (!buffer || buffer == ctx->globals_buffer) + continue; + + if (var1->reg_reservation.offset_type + || (var1->data_type->class == HLSL_CLASS_OBJECT && var1->reg_reservation.reg_type)) + buffer->manually_packed_elements = true; + else + buffer->automatically_packed_elements = true; + + if (buffer->manually_packed_elements && buffer->automatically_packed_elements) + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() must be specified for all the buffer elements, or none of them."); + break; + } + } } static void allocate_buffers(struct hlsl_ctx *ctx) @@ -2880,15 +3558,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->type != HLSL_CLASS_OBJECT) + if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) { if (var->is_param) var->buffer = ctx->params_buffer; - calculate_buffer_offset(var); + calculate_buffer_offset(ctx, var); } } + validate_buffer_offsets(ctx); + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) { if (!buffer->used_size) @@ -2896,28 +3576,30 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (buffer->type == HLSL_BUFFER_CONSTANT) { - if (buffer->reservation.type == 'b') + if (buffer->reservation.reg_type == 'b') { - const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.index); + const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); if (reserved_buffer && reserved_buffer != buffer) { hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple buffers bound to cb%u.", buffer->reservation.index); + "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, - "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.index); + "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); } - buffer->reg.id = buffer->reservation.index; + buffer->reg.id = buffer->reservation.reg_index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } - else if (!buffer->reservation.type) + else if (!buffer->reservation.reg_type) { while (get_reserved_buffer(ctx, index)) ++index; buffer->reg.id = index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; @@ -2939,13 +3621,29 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum uint32_t index) { const struct hlsl_ir_var *var; + unsigned int start, count; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { - if (!var->regs[regset].allocated) + if (var->reg_reservation.reg_type == get_regset_name(regset) + && var->data_type->reg_size[regset]) + { + /* Vars with a reservation prevent non-reserved vars from being + * bound there even if the reserved vars aren't used. */ + start = var->reg_reservation.reg_index; + count = var->data_type->reg_size[regset]; + } + else if (var->regs[regset].allocated) + { + start = var->regs[regset].id; + count = var->regs[regset].bind_count; + } + else + { continue; + } - if (index == var->regs[regset].id) + if (start <= index && index < start + count) return var; } return NULL; @@ -2956,7 +3654,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; - uint32_t index; if (regset == HLSL_REGSET_UAVS) { @@ -2968,19 +3665,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } } - index = min_index; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || !var->data_type->reg_size[regset]) + unsigned int count = var->regs[regset].bind_count; + + if (count == 0) continue; if (var->regs[regset].allocated) { - const struct hlsl_ir_var *reserved_object; - unsigned int index = var->regs[regset].id; - - reserved_object = get_allocated_object(ctx, regset, index); + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; + unsigned int index, i; if (var->regs[regset].id < min_index) { @@ -2988,28 +3683,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); + continue; } - else if (reserved_object && reserved_object != var) + + for (i = 0; i < count; ++i) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", regset_name, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); - } + index = var->regs[regset].id + i; - var->regs[regset].id = var->reg_reservation.index; - var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); + reserved_object = get_allocated_object(ctx, regset, index); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple variables bound to %c%u.", regset_name, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, + "Variable '%s' is already bound to %c%u.", reserved_object->name, + regset_name, index); + last_reported = reserved_object; + } + } } else { - while (get_allocated_object(ctx, regset, index)) + unsigned int index = min_index; + unsigned int available = 0; + + while (available < count) + { + if (get_allocated_object(ctx, regset, index)) + available = 0; + else + ++available; ++index; + } + index -= count; var->regs[regset].id = index; var->regs[regset].allocated = true; - TRACE("Allocated object to %c%u.\n", regset_name, index); + TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, + index + count); ++index; } } @@ -3034,12 +3745,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return false; /* We should always have generated a cast to UINT. */ - assert(path_node->data_type->type == HLSL_CLASS_SCALAR + assert(path_node->data_type->class == HLSL_CLASS_SCALAR && path_node->data_type->base_type == HLSL_TYPE_UINT); - idx = hlsl_ir_constant(path_node)->value[0].u; + idx = hlsl_ir_constant(path_node)->value.u[0].u; - switch (type->type) + switch (type->class) { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) @@ -3090,6 +3801,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; } +bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + assert(regset <= HLSL_REGSET_LAST_OBJECT); + + *index = 0; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + + switch (type->class) + { + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + return false; + + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_STRUCT: + *index += type->e.record.fields[idx].reg_offset[regset]; + break; + + default: + vkd3d_unreachable(); + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + assert(type->reg_size[regset] == 1); + return true; +} + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; @@ -3102,13 +3862,13 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref } /* We should always have generated a cast to UINT. */ - assert(offset_node->data_type->type == HLSL_CLASS_SCALAR + assert(offset_node->data_type->class == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT); if (offset_node->type != HLSL_IR_CONSTANT) return false; - *offset = hlsl_ir_constant(offset_node)->value[0].u; + *offset = hlsl_ir_constant(offset_node)->value.u[0].u; size = deref->var->data_type->reg_size[deref->offset_regset]; if (*offset >= size) @@ -3170,7 +3930,7 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a const struct hlsl_type *type = instr->data_type; const struct hlsl_ir_constant *constant; - if (type->type != HLSL_CLASS_SCALAR + if (type->class != HLSL_CLASS_SCALAR || (type->base_type != HLSL_TYPE_INT && type->base_type != HLSL_TYPE_UINT)) { struct vkd3d_string_buffer *string; @@ -3190,15 +3950,34 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } constant = hlsl_ir_constant(instr); - if ((type->base_type == HLSL_TYPE_INT && constant->value[0].i <= 0) - || (type->base_type == HLSL_TYPE_UINT && !constant->value[0].u)) + if ((type->base_type == HLSL_TYPE_INT && constant->value.u[0].i <= 0) + || (type->base_type == HLSL_TYPE_UINT && !constant->value.u[0].u)) hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT, "Thread count must be a positive integer."); - ctx->thread_count[i] = constant->value[0].u; + ctx->thread_count[i] = constant->value.u[0].u; } } +static bool type_has_object_components(struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_OBJECT) + return true; + if (type->class == HLSL_CLASS_ARRAY) + return type_has_object_components(type->e.array.type); + if (type->class == HLSL_CLASS_STRUCT) + { + unsigned int i; + + for (i = 0; i < type->e.record.field_count; ++i) + { + if (type_has_object_components(type->e.record.fields[i].type)) + return true; + } + } + return false; +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -3209,10 +3988,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry unsigned int i; bool progress; - list_move_head(&body->instrs, &ctx->static_initializers); + list_move_head(&body->instrs, &ctx->static_initializers.instrs); memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); - transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); + hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); vkd3d_free(recursive_call_ctx.backtrace); /* Avoid going into an infinite loop when processing call instructions. @@ -3222,7 +4001,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_return(ctx, entry_func, body, false); - while (transform_ir(ctx, lower_calls, body, NULL)); + while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); + + hlsl_transform_ir(ctx, lower_index_loads, body, NULL); LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { @@ -3234,15 +4015,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { var = entry_func->parameters.vars[i]; - if (var->data_type->type == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { prepend_uniform_copy(ctx, &body->instrs, var); } else { - if (var->data_type->type != HLSL_CLASS_STRUCT && !var->semantic.name) + if (type_has_object_components(var->data_type)) + hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); + + if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT + && !var->semantic.name) + { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Parameter \"%s\" is missing a semantic.", var->name); + var->semantic.reported_missing = true; + } if (var->storage_modifiers & HLSL_STORAGE_IN) prepend_input_var_copy(ctx, &body->instrs, var); @@ -3252,7 +4040,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } if (entry_func->return_var) { - if (entry_func->return_var->data_type->type != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) + if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT && !entry_func->return_var->semantic.name) hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); @@ -3274,60 +4062,71 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - transform_ir(ctx, lower_broadcasts, body, NULL); - while (transform_ir(ctx, fold_redundant_casts, body, NULL)); + hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); + while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do { - progress = transform_ir(ctx, split_array_copies, body, NULL); - progress |= transform_ir(ctx, split_struct_copies, body, NULL); + progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); + progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); } while (progress); - transform_ir(ctx, split_matrix_copies, body, NULL); - - transform_ir(ctx, lower_narrowing_casts, body, NULL); - transform_ir(ctx, lower_casts_to_bool, body, NULL); - transform_ir(ctx, lower_int_division, body, NULL); - transform_ir(ctx, lower_int_modulus, body, NULL); - transform_ir(ctx, lower_int_abs, body, NULL); - transform_ir(ctx, lower_float_modulus, body, NULL); + hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + + hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + hlsl_transform_ir(ctx, lower_int_division, body, NULL); + hlsl_transform_ir(ctx, lower_int_modulus, body, NULL); + hlsl_transform_ir(ctx, lower_int_abs, body, NULL); + hlsl_transform_ir(ctx, lower_float_modulus, body, NULL); + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); do { - progress = transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= copy_propagation_execute(ctx, body); - progress |= transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); } while (progress); + hlsl_transform_ir(ctx, lower_nonconstant_vector_derefs, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + if (profile->major_version < 4) { - transform_ir(ctx, lower_division, body, NULL); - transform_ir(ctx, lower_sqrt, body, NULL); - transform_ir(ctx, lower_dot, body, NULL); + hlsl_transform_ir(ctx, lower_division, body, NULL); + hlsl_transform_ir(ctx, lower_sqrt, body, NULL); + hlsl_transform_ir(ctx, lower_dot, body, NULL); + hlsl_transform_ir(ctx, lower_round, body, NULL); } if (profile->major_version < 2) { - transform_ir(ctx, lower_abs, body, NULL); + hlsl_transform_ir(ctx, lower_abs, body, NULL); } - transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); /* TODO: move forward, remove when no longer needed */ - transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); - while (transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); + transform_derefs(ctx, replace_deref_path_with_offset, body); + while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); do compute_liveness(ctx, entry_func); - while (transform_ir(ctx, dce, body, NULL)); + while (hlsl_transform_ir(ctx, dce, body, NULL)); compute_liveness(ctx, entry_func); if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx); + calculate_resource_register_counts(ctx); + allocate_register_reservations(ctx); + allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 3210bbd5712..301113c8477 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -22,7 +22,49 @@ #include "hlsl.h" -static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fabsf(src->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fabs(src->value.u[k].d); + break; + + case HLSL_TYPE_INT: + /* C's abs(INT_MIN) is undefined, but HLSL evaluates this to INT_MIN */ + if (src->value.u[k].i == INT_MIN) + dst->u[k].i = INT_MIN; + else + dst->u[k].i = abs(src->value.u[k].i); + break; + + case HLSL_TYPE_UINT: + dst->u[k].u = src->value.u[k].u; + break; + + default: + FIXME("Fold abs() for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { unsigned int k; uint32_t u; @@ -30,11 +72,11 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct double d; float f; - if (dst->node.data_type->dimx != src->node.data_type->dimx - || dst->node.data_type->dimy != src->node.data_type->dimy) + if (dst_type->dimx != src->node.data_type->dimx + || dst_type->dimy != src->node.data_type->dimy) { FIXME("Cast from %s to %s.\n", debug_hlsl_type(ctx, src->node.data_type), - debug_hlsl_type(ctx, dst->node.data_type)); + debug_hlsl_type(ctx, dst_type)); return false; } @@ -44,61 +86,61 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - u = src->value[k].f; - i = src->value[k].f; - f = src->value[k].f; - d = src->value[k].f; + u = src->value.u[k].f; + i = src->value.u[k].f; + f = src->value.u[k].f; + d = src->value.u[k].f; break; case HLSL_TYPE_DOUBLE: - u = src->value[k].d; - i = src->value[k].d; - f = src->value[k].d; - d = src->value[k].d; + u = src->value.u[k].d; + i = src->value.u[k].d; + f = src->value.u[k].d; + d = src->value.u[k].d; break; case HLSL_TYPE_INT: - u = src->value[k].i; - i = src->value[k].i; - f = src->value[k].i; - d = src->value[k].i; + u = src->value.u[k].i; + i = src->value.u[k].i; + f = src->value.u[k].i; + d = src->value.u[k].i; break; case HLSL_TYPE_UINT: - u = src->value[k].u; - i = src->value[k].u; - f = src->value[k].u; - d = src->value[k].u; + u = src->value.u[k].u; + i = src->value.u[k].u; + f = src->value.u[k].u; + d = src->value.u[k].u; break; case HLSL_TYPE_BOOL: - u = !!src->value[k].u; - i = !!src->value[k].u; - f = !!src->value[k].u; - d = !!src->value[k].u; + u = !!src->value.u[k].u; + i = !!src->value.u[k].u; + f = !!src->value.u[k].u; + d = !!src->value.u[k].u; break; default: vkd3d_unreachable(); } - switch (dst->node.data_type->base_type) + switch (dst_type->base_type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = f; + dst->u[k].f = f; break; case HLSL_TYPE_DOUBLE: - dst->value[k].d = d; + dst->u[k].d = d; break; case HLSL_TYPE_INT: - dst->value[k].i = i; + dst->u[k].i = i; break; case HLSL_TYPE_UINT: - dst->value[k].u = u; + dst->u[k].u = u; break; case HLSL_TYPE_BOOL: @@ -110,9 +152,10 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct return true; } -static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src) +static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src->node.data_type->base_type); @@ -123,30 +166,30 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = -src->value[k].f; + dst->u[k].f = -src->value.u[k].f; break; case HLSL_TYPE_DOUBLE: - dst->value[k].d = -src->value[k].d; + dst->u[k].d = -src->value.u[k].d; break; case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = -src->value[k].u; + dst->u[k].u = -src->value.u[k].u; break; default: - FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold negation for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, - struct hlsl_ir_constant *src2) +static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); @@ -158,32 +201,32 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = src1->value[k].f + src2->value[k].f; + dst->u[k].f = src1->value.u[k].f + src2->value.u[k].f; break; case HLSL_TYPE_DOUBLE: - dst->value[k].d = src1->value[k].d + src2->value[k].d; + dst->u[k].d = src1->value.u[k].d + src2->value.u[k].d; break; /* Handling HLSL_TYPE_INT through the unsigned field to avoid * undefined behavior with signed integers in C. */ case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u + src2->value[k].u; + dst->u[k].u = src1->value.u[k].u + src2->value.u[k].u; break; default: - FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold addition for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); @@ -195,32 +238,32 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].f = src1->value[k].f * src2->value[k].f; + dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; break; case HLSL_TYPE_DOUBLE: - dst->value[k].d = src1->value[k].d * src2->value[k].d; + dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; break; case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u * src2->value[k].u; + dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; break; default: - FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { unsigned int k; - assert(dst->node.data_type->base_type == HLSL_TYPE_BOOL); + assert(dst_type->base_type == HLSL_TYPE_BOOL); assert(src1->node.data_type->base_type == src2->node.data_type->base_type); for (k = 0; k < 4; ++k) @@ -229,270 +272,270 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - dst->value[k].u = src1->value[k].f != src2->value[k].f; + dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; break; case HLSL_TYPE_DOUBLE: - dst->value[k].u = src1->value[k].d != src2->value[k].d; + dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; break; case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: - dst->value[k].u = src1->value[k].u != src2->value[k].u; + dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; break; default: vkd3d_unreachable(); } - dst->value[k].u *= ~0u; + dst->u[k].u *= ~0u; } return true; } -static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - if (ctx->profile->major_version >= 4 && src2->value[k].f == 0) + if (ctx->profile->major_version >= 4 && src2->value.u[k].f == 0) { - hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, "Floating point division by zero."); } - dst->value[k].f = src1->value[k].f / src2->value[k].f; - if (ctx->profile->major_version < 4 && !isfinite(dst->value[k].f)) + dst->u[k].f = src1->value.u[k].f / src2->value.u[k].f; + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Infinities and NaNs are not allowed by the shader model."); } break; case HLSL_TYPE_DOUBLE: - if (src2->value[k].d == 0) + if (src2->value.u[k].d == 0) { - hlsl_warning(ctx, &dst->node.loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, "Floating point division by zero."); } - dst->value[k].d = src1->value[k].d / src2->value[k].d; + dst->u[k].d = src1->value.u[k].d / src2->value.u[k].d; break; case HLSL_TYPE_INT: - if (src2->value[k].i == 0) + if (src2->value.u[k].i == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) - dst->value[k].i = INT_MIN; + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = INT_MIN; else - dst->value[k].i = src1->value[k].i / src2->value[k].i; + dst->u[k].i = src1->value.u[k].i / src2->value.u[k].i; break; case HLSL_TYPE_UINT: - if (src2->value[k].u == 0) + if (src2->value.u[k].u == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - dst->value[k].u = src1->value[k].u / src2->value[k].u; + dst->u[k].u = src1->value.u[k].u / src2->value.u[k].u; break; default: - FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold division for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - if (src2->value[k].i == 0) + if (src2->value.u[k].i == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - if (src1->value[k].i == INT_MIN && src2->value[k].i == -1) - dst->value[k].i = 0; + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = 0; else - dst->value[k].i = src1->value[k].i % src2->value[k].i; + dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; break; case HLSL_TYPE_UINT: - if (src2->value[k].u == 0) + if (src2->value.u[k].u == 0) { - hlsl_error(ctx, &dst->node.loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, - "Division by zero."); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); return false; } - dst->value[k].u = src1->value[k].u % src2->value[k].u; + dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; break; default: - FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - dst->value[k].i = max(src1->value[k].i, src2->value[k].i); + dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); break; case HLSL_TYPE_UINT: - dst->value[k].u = max(src1->value[k].u, src2->value[k].u); + dst->u[k].u = max(src1->value.u[k].u, src2->value.u[k].u); break; default: - FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: - dst->value[k].i = min(src1->value[k].i, src2->value[k].i); + dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); break; case HLSL_TYPE_UINT: - dst->value[k].u = min(src1->value[k].u, src2->value[k].u); + dst->u[k].u = min(src1->value.u[k].u, src2->value.u[k].u); break; default: - FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u ^ src2->value[k].u; + dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; break; default: - FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u & src2->value[k].u; + dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; break; default: - FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, - struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) +static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst->node.data_type->base_type; + enum hlsl_base_type type = dst_type->base_type; unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < dst->node.data_type->dimx; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->value[k].u = src1->value[k].u | src2->value[k].u; + dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; break; default: - FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type)); + FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } @@ -501,7 +544,9 @@ static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *arg1, *arg2 = NULL, *res; + struct hlsl_ir_constant *arg1, *arg2 = NULL; + struct hlsl_constant_value res = {0}; + struct hlsl_ir_node *res_node; struct hlsl_ir_expr *expr; unsigned int i; bool success; @@ -512,7 +557,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (!expr->operands[0].node) return false; - if (instr->data_type->type > HLSL_CLASS_VECTOR) + if (instr->data_type->class > HLSL_CLASS_VECTOR) return false; for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) @@ -521,64 +566,65 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, { if (expr->operands[i].node->type != HLSL_IR_CONSTANT) return false; - assert(expr->operands[i].node->data_type->type <= HLSL_CLASS_VECTOR); + assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); } } arg1 = hlsl_ir_constant(expr->operands[0].node); if (expr->operands[1].node) arg2 = hlsl_ir_constant(expr->operands[1].node); - if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) - return false; - switch (expr->op) { + case HLSL_OP1_ABS: + success = fold_abs(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_CAST: - success = fold_cast(ctx, res, arg1); + success = fold_cast(ctx, &res, instr->data_type, arg1); break; case HLSL_OP1_NEG: - success = fold_neg(ctx, res, arg1); + success = fold_neg(ctx, &res, instr->data_type, arg1); break; case HLSL_OP2_ADD: - success = fold_add(ctx, res, arg1, arg2); + success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_MUL: - success = fold_mul(ctx, res, arg1, arg2); + success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_NEQUAL: - success = fold_nequal(ctx, res, arg1, arg2); + success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_DIV: - success = fold_div(ctx, res, arg1, arg2); + success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; case HLSL_OP2_MOD: - success = fold_mod(ctx, res, arg1, arg2); + success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; case HLSL_OP2_MAX: - success = fold_max(ctx, res, arg1, arg2); + success = fold_max(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_MIN: - success = fold_min(ctx, res, arg1, arg2); + success = fold_min(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_BIT_XOR: - success = fold_bit_xor(ctx, res, arg1, arg2); + success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_BIT_AND: - success = fold_bit_and(ctx, res, arg1, arg2); + success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_BIT_OR: - success = fold_bit_or(ctx, res, arg1, arg2); + success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); break; default: @@ -589,20 +635,20 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (success) { - list_add_before(&expr->node.entry, &res->node.entry); - hlsl_replace_node(&expr->node, &res->node); - } - else - { - vkd3d_free(res); + if (!(res_node = hlsl_new_constant(ctx, instr->data_type, &res, &instr->loc))) + return false; + list_add_before(&expr->node.entry, &res_node->entry); + hlsl_replace_node(&expr->node, res_node); } return success; } bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - struct hlsl_ir_constant *value, *res; + struct hlsl_constant_value value; struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_constant *src; + struct hlsl_ir_node *dst; unsigned int i; if (instr->type != HLSL_IR_SWIZZLE) @@ -610,15 +656,15 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst swizzle = hlsl_ir_swizzle(instr); if (swizzle->val.node->type != HLSL_IR_CONSTANT) return false; - value = hlsl_ir_constant(swizzle->val.node); - - if (!(res = hlsl_new_constant(ctx, instr->data_type, &instr->loc))) - return false; + src = hlsl_ir_constant(swizzle->val.node); for (i = 0; i < swizzle->node.data_type->dimx; ++i) - res->value[i] = value->value[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; + + if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) + return false; - list_add_before(&swizzle->node.entry, &res->node.entry); - hlsl_replace_node(&swizzle->node, &res->node); + list_add_before(&swizzle->node.entry, &dst->entry); + hlsl_replace_node(&swizzle->node, dst); return true; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c deleted file mode 100644 index 4a62d804ed6..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm1.c +++ /dev/null @@ -1,980 +0,0 @@ -/* - * HLSL code generation for DXBC shader models 1-3 - * - * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "hlsl.h" -#include - -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -{ - unsigned int i; - - static const struct - { - const char *semantic; - bool output; - enum vkd3d_shader_type shader_type; - unsigned int major_version; - D3DSHADER_PARAM_REGISTER_TYPE type; - DWORD offset; - } - register_table[] = - { - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type - && ctx->profile->major_version == register_table[i].major_version) - { - *type = register_table[i].type; - if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) - *reg = register_table[i].offset; - else - *reg = semantic->index; - return true; - } - } - - return false; -} - -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -{ - static const struct - { - const char *name; - D3DDECLUSAGE usage; - } - semantics[] = - { - {"binormal", D3DDECLUSAGE_BINORMAL}, - {"blendindices", D3DDECLUSAGE_BLENDINDICES}, - {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, - {"color", D3DDECLUSAGE_COLOR}, - {"depth", D3DDECLUSAGE_DEPTH}, - {"fog", D3DDECLUSAGE_FOG}, - {"normal", D3DDECLUSAGE_NORMAL}, - {"position", D3DDECLUSAGE_POSITION}, - {"positiont", D3DDECLUSAGE_POSITIONT}, - {"psize", D3DDECLUSAGE_PSIZE}, - {"sample", D3DDECLUSAGE_SAMPLE}, - {"sv_depth", D3DDECLUSAGE_DEPTH}, - {"sv_position", D3DDECLUSAGE_POSITION}, - {"sv_target", D3DDECLUSAGE_COLOR}, - {"tangent", D3DDECLUSAGE_TANGENT}, - {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, - {"texcoord", D3DDECLUSAGE_TEXCOORD}, - }; - - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { - if (!ascii_strcasecmp(semantic->name, semantics[i].name)) - { - *usage = semantics[i].usage; - *usage_idx = semantic->index; - return true; - } - } - - return false; -} - -static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) -{ - if (type == VKD3D_SHADER_TYPE_VERTEX) - return D3DVS_VERSION(major, minor); - else - return D3DPS_VERSION(major, minor); -} - -static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) -{ - switch (type->type) - { - case HLSL_CLASS_ARRAY: - return sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else - return D3DXPC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3DXPC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3DXPC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3DXPC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3DXPC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->type); - vkd3d_unreachable(); - } -} - -static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - return D3DXPT_BOOL; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3DXPT_FLOAT; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return D3DXPT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3DXPT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_SAMPLER; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3DXPT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3DXPT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3DXPT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3DXPT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3DXPT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3DXPT_TEXTURE; - default: - ERR("Invalid dimension %#x.\n", type->sampler_dim); - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_VERTEXSHADER: - return D3DXPT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3DXPT_VOID; - default: - vkd3d_unreachable(); - } -} - -static const struct hlsl_type *get_array_type(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_type(type->e.array.type); - return type; -} - -static unsigned int get_array_size(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; - return 1; -} - -static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -{ - const struct hlsl_type *array_type = get_array_type(type); - unsigned int array_size = get_array_size(type); - unsigned int field_count = 0; - size_t fields_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (array_type->type == HLSL_CLASS_STRUCT) - { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm1_type(buffer, field->type, ctab_start); - } - - fields_offset = bytecode_get_size(buffer) - ctab_start; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset - ctab_start); - put_u32(buffer, field->type->bytecode_offset - ctab_start); - } - } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -} - -static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) -{ - struct hlsl_ir_var *var; - - list_remove(&to_sort->extern_entry); - - LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) - { - if (strcmp(to_sort->name, var->name) < 0) - { - list_add_before(&var->extern_entry, &to_sort->extern_entry); - return; - } - } - - list_add_tail(sorted, &to_sort->extern_entry); -} - -static void sm1_sort_externs(struct hlsl_ctx *ctx) -{ - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; - - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - sm1_sort_extern(&sorted, var); - list_move_tail(&ctx->extern_vars, &sorted); -} - -static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - struct hlsl_ir_function_decl *entry_func) -{ - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; - struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - ++uniform_count; - - if (var->is_param && var->is_uniform) - { - struct vkd3d_string_buffer *name; - - if (!(name = hlsl_get_string_buffer(ctx))) - { - buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return; - } - vkd3d_string_buffer_printf(name, "$%s", var->name); - vkd3d_free((char *)var->name); - var->name = hlsl_strdup(ctx, name->buffer); - hlsl_release_string_buffer(ctx, name); - } - } - } - - sm1_sort_externs(ctx); - - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - - ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); - put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ - put_u32(buffer, 0); /* FIXME: flags */ - put_u32(buffer, 0); /* FIXME: target string */ - - vars_start = bytecode_get_size(buffer); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - put_u32(buffer, 0); /* name */ - if (var->data_type->type == HLSL_CLASS_OBJECT - && (var->data_type->base_type == HLSL_TYPE_SAMPLER - || var->data_type->base_type == HLSL_TYPE_TEXTURE)) - { - assert(regset == HLSL_REGSET_SAMPLERS); - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); - put_u32(buffer, 1); - } - else - { - assert(regset == HLSL_REGSET_NUMERIC); - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); - put_u32(buffer, var->data_type->reg_size[regset] / 4); - } - put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ - } - } - - uniform_count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); - - if (!var->semantic.name && var->regs[regset].allocated) - { - size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); - size_t name_offset; - - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); - ++uniform_count; - } - } - - offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(buffer, creator_offset, offset - ctab_start); - - ctab_end = bytecode_get_size(buffer); - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); -} - -static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -{ - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -} - -struct sm1_instruction -{ - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; - - struct sm1_dst_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; - unsigned int writemask; - uint32_t reg; - } dst; - - struct sm1_src_register - { - D3DSHADER_PARAM_REGISTER_TYPE type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; - } srcs[3]; - unsigned int src_count; - - unsigned int has_dst; -}; - -static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) -{ - assert(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); -} - -static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - const struct sm1_src_register *reg) -{ - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); -} - -static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct sm1_instruction *instr) -{ - uint32_t token = instr->opcode; - unsigned int i; - - if (ctx->profile->major_version > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - if (instr->has_dst) - write_sm1_dst_register(buffer, &instr->dst); - - for (i = 0; i < instr->src_count; ++i) - write_sm1_src_register(buffer, &instr->srcs[i]); -}; - -static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask) -{ - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); -} - -static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, - const struct hlsl_reg *src3) -{ - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = D3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, - .src_count = 1, - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); -} - -static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - unsigned int i, x; - - for (i = 0; i < ctx->constant_defs.count; ++i) - { - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { - .type = D3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = i, - }; - - if (ctx->profile->major_version > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - write_sm1_dst_register(buffer, ®); - for (x = 0; x < 4; ++x) - put_f32(buffer, ctx->constant_defs.values[i].f[x]); - } -} - -static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool output) -{ - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; - - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else - { - ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); - assert(ret); - reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; - reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; - } - - token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - - token = (1u << 31); - token |= usage << D3DSP_DCL_USAGE_SHIFT; - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - - reg.writemask = (1 << var->data_type->dimx) - 1; - write_sm1_dst_register(buffer, ®); -} - -static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -{ - bool write_in = false, write_out = false; - struct hlsl_ir_var *var; - - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - write_in = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) - write_in = write_out = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) - write_in = true; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (write_in && var->is_input_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, false); - if (write_out && var->is_output_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, true); - } -} - -static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - unsigned int i; - - for (i = 0; i < instr->data_type->dimx; ++i) - { - struct hlsl_reg src = arg1->reg, dst = instr->reg; - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); - } -} - -static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; - - assert(instr->reg.allocated); - - if (instr->data_type->base_type != HLSL_TYPE_FLOAT) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); - return; - } - - switch (expr->op) - { - case HLSL_OP1_ABS: - write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_NEG: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); - break; - - case HLSL_OP2_ADD: - write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: - write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: - write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_OP3_DP2ADD: - write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); - break; - } -} - -static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_load *load = hlsl_ir_load(instr); - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, - }; - - assert(instr->reg.allocated); - - if (load->src.var->is_uniform) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, - false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else - sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1); - } - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_store *store = hlsl_ir_store(instr); - const struct hlsl_ir_node *rhs = store->rhs.node; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, - }; - - if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX) - { - FIXME("Matrix writemasks need to be lowered.\n"); - return; - } - - if (store->lhs.var->is_output_semantic) - { - if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, - true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - assert(reg.allocated); - sm1_instr.dst.type = D3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; - } - else - assert(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - - .dst.type = D3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), - .src_count = 1, - }; - - assert(instr->reg.allocated); - assert(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); -} - -static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_function_decl *entry_func) -{ - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->type == HLSL_CLASS_MATRIX) - { - /* These need to be lowered. */ - hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression."); - continue; - } - else if (instr->data_type->type == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - write_sm1_constant(ctx, buffer, instr); - break; - - case HLSL_IR_EXPR: - write_sm1_expr(ctx, buffer, instr); - break; - - case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); - break; - - case HLSL_IR_STORE: - write_sm1_store(ctx, buffer, instr); - break; - - case HLSL_IR_SWIZZLE: - write_sm1_swizzle(ctx, buffer, instr); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} - -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -{ - struct vkd3d_bytecode_buffer buffer = {0}; - int ret; - - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - - write_sm1_uniforms(ctx, &buffer, entry_func); - - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_instructions(ctx, &buffer, entry_func); - - put_u32(&buffer, D3DSIO_END); - - if (!(ret = buffer.status)) - { - out->code = buffer.data; - out->size = buffer.size; - } - return ret; -} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c deleted file mode 100644 index 553a75818e7..00000000000 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_sm4.c +++ /dev/null @@ -1,2531 +0,0 @@ -/* - * HLSL code generation for DXBC shader models 4-5 - * - * Copyright 2019-2020 Zebediah Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "hlsl.h" -#include -#include "d3dcommon.h" -#include "sm4.h" - -static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); - -static bool type_is_integer(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - return true; - - default: - return false; - } -} - -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) -{ - unsigned int i; - - static const struct - { - const char *semantic; - bool output; - enum vkd3d_shader_type shader_type; - enum vkd3d_sm4_swizzle_type swizzle_type; - enum vkd3d_sm4_register_type type; - bool has_idx; - } - register_table[] = - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, - - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, - - /* Put sv_target in this table, instead of letting it fall through to - * default varying allocation, so that the register index matches the - * usage index. */ - {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) - && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type) - { - *type = register_table[i].type; - if (swizzle_type) - *swizzle_type = register_table[i].swizzle_type; - *has_idx = register_table[i].has_idx; - return true; - } - } - - return false; -} - -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3D_NAME *usage) -{ - unsigned int i; - - static const struct - { - const char *name; - bool output; - enum vkd3d_shader_type shader_type; - D3DDECLUSAGE usage; - } - semantics[] = - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - - {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, - {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, - - {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, - - {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - }; - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { - if (!ascii_strcasecmp(semantic->name, semantics[i].name) - && output == semantics[i].output - && ctx->profile->type == semantics[i].shader_type - && !ascii_strncasecmp(semantic->name, "sv_", 3)) - { - *usage = semantics[i].usage; - return true; - } - } - - if (!ascii_strncasecmp(semantic->name, "sv_", 3)) - return false; - - *usage = D3D_NAME_UNDEFINED; - return true; -} - -static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -{ - struct vkd3d_bytecode_buffer buffer = {0}; - struct vkd3d_string_buffer *string; - const struct hlsl_ir_var *var; - size_t count_position; - unsigned int i; - bool ret; - - count_position = put_u32(&buffer, 0); - put_u32(&buffer, 8); /* unknown */ - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - enum vkd3d_sm4_register_type type; - uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - assert(ret); - if (usage == ~0u) - continue; - usage_idx = var->semantic.index; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; - } - else - { - assert(var->regs[HLSL_REGSET_NUMERIC].allocated); - type = VKD3D_SM4_RT_INPUT; - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - - use_mask = width; /* FIXME: accurately report use mask */ - if (output) - use_mask = 0xf ^ use_mask; - - /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ - if (usage >= 64) - usage = 0; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); - switch (var->data_type->base_type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); - break; - - case HLSL_TYPE_INT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); - break; - - default: - if ((string = hlsl_type_to_string(ctx, var->data_type))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid data type %s for semantic variable %s.", string->buffer, var->name); - hlsl_release_string_buffer(ctx, string); - put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); - } - put_u32(&buffer, reg_idx); - put_u32(&buffer, vkd3d_make_u16(width, use_mask)); - } - - i = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - const char *semantic = var->semantic.name; - size_t string_offset; - D3D_NAME usage; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - continue; - - if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) - string_offset = put_string(&buffer, "SV_Target"); - else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) - string_offset = put_string(&buffer, "SV_Depth"); - else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) - string_offset = put_string(&buffer, "SV_Position"); - else - string_offset = put_string(&buffer, semantic); - set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); - } - - set_u32(&buffer, count_position, i); - - dxbc_writer_add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, buffer.data, buffer.size); -} - -static const struct hlsl_type *get_array_type(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_type(type->e.array.type); - return type; -} - -static unsigned int get_array_size(const struct hlsl_type *type) -{ - if (type->type == HLSL_CLASS_ARRAY) - return get_array_size(type->e.array.type) * type->e.array.elements_count; - return 1; -} - -static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -{ - switch (type->type) - { - case HLSL_CLASS_ARRAY: - return sm4_class(type->e.array.type); - case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else - return D3D_SVC_MATRIX_ROWS; - case HLSL_CLASS_OBJECT: - return D3D_SVC_OBJECT; - case HLSL_CLASS_SCALAR: - return D3D_SVC_SCALAR; - case HLSL_CLASS_STRUCT: - return D3D_SVC_STRUCT; - case HLSL_CLASS_VECTOR: - return D3D_SVC_VECTOR; - default: - ERR("Invalid class %#x.\n", type->type); - vkd3d_unreachable(); - } -} - -static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - return D3D_SVT_BOOL; - case HLSL_TYPE_DOUBLE: - return D3D_SVT_DOUBLE; - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3D_SVT_FLOAT; - case HLSL_TYPE_INT: - return D3D_SVT_INT; - case HLSL_TYPE_PIXELSHADER: - return D3D_SVT_PIXELSHADER; - case HLSL_TYPE_SAMPLER: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_SAMPLER1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_SAMPLER2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_SAMPLER3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_SAMPLERCUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_SAMPLER; - default: - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_STRING: - return D3D_SVT_STRING; - case HLSL_TYPE_TEXTURE: - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SVT_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SVT_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SVT_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SVT_TEXTURECUBE; - case HLSL_SAMPLER_DIM_GENERIC: - return D3D_SVT_TEXTURE; - default: - vkd3d_unreachable(); - } - break; - case HLSL_TYPE_UINT: - return D3D_SVT_UINT; - case HLSL_TYPE_VERTEXSHADER: - return D3D_SVT_VERTEXSHADER; - case HLSL_TYPE_VOID: - return D3D_SVT_VOID; - default: - vkd3d_unreachable(); - } -} - -static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) -{ - const struct hlsl_type *array_type = get_array_type(type); - const char *name = array_type->name ? array_type->name : ""; - const struct hlsl_profile_info *profile = ctx->profile; - unsigned int field_count = 0, array_size = 0; - size_t fields_offset = 0, name_offset = 0; - size_t i; - - if (type->bytecode_offset) - return; - - if (profile->major_version >= 5) - name_offset = put_string(buffer, name); - - if (type->type == HLSL_CLASS_ARRAY) - array_size = get_array_size(type); - - if (array_type->type == HLSL_CLASS_STRUCT) - { - field_count = array_type->e.record.field_count; - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); - write_sm4_type(ctx, buffer, field->type); - } - - fields_offset = bytecode_get_size(buffer); - - for (i = 0; i < field_count; ++i) - { - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - put_u32(buffer, field->name_bytecode_offset); - put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); - } - } - - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); - - if (profile->major_version >= 5) - { - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, 0); /* FIXME: unknown */ - put_u32(buffer, name_offset); - } -} - -static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) -{ - switch (type->base_type) - { - case HLSL_TYPE_SAMPLER: - return D3D_SIT_SAMPLER; - case HLSL_TYPE_TEXTURE: - return D3D_SIT_TEXTURE; - case HLSL_TYPE_UAV: - return D3D_SIT_UAV_RWTYPED; - default: - vkd3d_unreachable(); - } -} - -static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) -{ - switch (type->e.resource_format->base_type) - { - case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - return D3D_RETURN_TYPE_FLOAT; - - case HLSL_TYPE_INT: - return D3D_RETURN_TYPE_SINT; - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - return D3D_RETURN_TYPE_UINT; - - default: - vkd3d_unreachable(); - } -} - -static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return D3D_SRV_DIMENSION_TEXTURE1D; - case HLSL_SAMPLER_DIM_2D: - return D3D_SRV_DIMENSION_TEXTURE2D; - case HLSL_SAMPLER_DIM_3D: - return D3D_SRV_DIMENSION_TEXTURE3D; - case HLSL_SAMPLER_DIM_CUBE: - return D3D_SRV_DIMENSION_TEXTURECUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return D3D_SRV_DIMENSION_TEXTURE1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return D3D_SRV_DIMENSION_TEXTURE2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; - default: - vkd3d_unreachable(); - } -} - -static int sm4_compare_extern_resources(const void *a, const void *b) -{ - const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; - const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; - enum hlsl_regset aa_regset, bb_regset; - - aa_regset = hlsl_type_get_regset(aa->data_type); - bb_regset = hlsl_type_get_regset(bb->data_type); - - if (aa_regset != bb_regset) - return aa_regset - bb_regset; - - return aa->regs[aa_regset].id - bb->regs[bb_regset].id; -} - -static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -{ - const struct hlsl_ir_var **extern_resources = NULL; - const struct hlsl_ir_var *var; - enum hlsl_regset regset; - size_t capacity = 0; - - *count = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!hlsl_type_is_resource(var->data_type)) - continue; - regset = hlsl_type_get_regset(var->data_type); - if (!var->regs[regset].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) - { - *count = 0; - return NULL; - } - - extern_resources[*count] = var; - ++*count; - } - - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - return extern_resources; -} - -static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -{ - unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - - static const uint16_t target_types[] = - { - 0xffff, /* PIXEL */ - 0xfffe, /* VERTEX */ - 0x4753, /* GEOMETRY */ - 0x4853, /* HULL */ - 0x4453, /* DOMAIN */ - 0x4353, /* COMPUTE */ - }; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - resource_count += extern_resources_count; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - { - ++cbuffer_count; - ++resource_count; - } - } - - put_u32(&buffer, cbuffer_count); - cbuffer_position = put_u32(&buffer, 0); - put_u32(&buffer, resource_count); - resource_position = put_u32(&buffer, 0); - put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), - target_types[profile->type])); - put_u32(&buffer, 0); /* FIXME: compilation flags */ - creator_position = put_u32(&buffer, 0); - - if (profile->major_version >= 5) - { - put_u32(&buffer, TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ - put_u32(&buffer, 0); /* unknown; possibly a null terminator */ - } - - /* Bound resources. */ - - resources_offset = bytecode_get_size(&buffer); - set_u32(&buffer, resource_position, resources_offset); - - for (i = 0; i < extern_resources_count; ++i) - { - enum hlsl_regset regset; - uint32_t flags = 0; - - var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(var->data_type)); - if (regset == HLSL_REGSET_SAMPLERS) - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); - put_u32(&buffer, 0); - } - else - { - put_u32(&buffer, sm4_resource_format(var->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } - put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - uint32_t flags = 0; - - if (!cbuffer->reg.allocated) - continue; - - if (cbuffer->reservation.type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - put_u32(&buffer, 0); /* return type */ - put_u32(&buffer, 0); /* dimension */ - put_u32(&buffer, 0); /* multisample count */ - put_u32(&buffer, cbuffer->reg.id); /* bind point */ - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); /* flags */ - } - - for (i = 0; i < extern_resources_count; ++i) - { - var = extern_resources[i]; - - string_offset = put_string(&buffer, var->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); - } - - /* Buffers. */ - - cbuffers_offset = bytecode_get_size(&buffer); - set_u32(&buffer, cbuffer_position, cbuffers_offset); - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - unsigned int var_count = 0; - - if (!cbuffer->reg.allocated) - continue; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - ++var_count; - } - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var_count); - put_u32(&buffer, 0); /* variable offset */ - put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); - put_u32(&buffer, 0); /* FIXME: flags */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); - } - - i = 0; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - size_t vars_start = bytecode_get_size(&buffer); - - if (!cbuffer->reg.allocated) - continue; - - set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - { - uint32_t flags = 0; - - if (var->last_read) - flags |= D3D_SVF_USED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, var->buffer_offset * sizeof(float)); - put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); - put_u32(&buffer, flags); - put_u32(&buffer, 0); /* type */ - put_u32(&buffer, 0); /* FIXME: default value */ - - if (profile->major_version >= 5) - { - put_u32(&buffer, 0); /* texture start */ - put_u32(&buffer, 0); /* texture count */ - put_u32(&buffer, 0); /* sampler start */ - put_u32(&buffer, 0); /* sampler count */ - } - } - } - - j = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform && var->buffer == cbuffer) - { - const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); - size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); - size_t string_offset = put_string(&buffer, var->name); - - set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); - set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); - ++j; - } - } - } - - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - - dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size); - - vkd3d_free(extern_resources); -} - -static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -{ - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: - return VKD3D_SM4_RESOURCE_TEXTURE_1D; - case HLSL_SAMPLER_DIM_2D: - return VKD3D_SM4_RESOURCE_TEXTURE_2D; - case HLSL_SAMPLER_DIM_3D: - return VKD3D_SM4_RESOURCE_TEXTURE_3D; - case HLSL_SAMPLER_DIM_CUBE: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; - case HLSL_SAMPLER_DIM_1DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; - case HLSL_SAMPLER_DIM_2DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; - case HLSL_SAMPLER_DIM_2DMS: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; - case HLSL_SAMPLER_DIM_2DMSARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; - case HLSL_SAMPLER_DIM_CUBEARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; - default: - vkd3d_unreachable(); - } -} - -struct sm4_instruction_modifier -{ - enum vkd3d_sm4_instruction_modifier type; - - union - { - struct - { - int u, v, w; - } aoffimmi; - } u; -}; - -static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) -{ - uint32_t word = 0; - - word |= VKD3D_SM4_MODIFIER_MASK & imod->type; - - switch (imod->type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: - assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); - assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); - assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); - word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; - break; - - default: - vkd3d_unreachable(); - } - - return word; -} - -struct sm4_register -{ - enum vkd3d_sm4_register_type type; - uint32_t idx[2]; - unsigned int idx_count; - enum vkd3d_sm4_dimension dim; - uint32_t immconst_uint[4]; - unsigned int mod; -}; - -struct sm4_instruction -{ - enum vkd3d_sm4_opcode opcode; - - struct sm4_instruction_modifier modifiers[1]; - unsigned int modifier_count; - - struct sm4_dst_register - { - struct sm4_register reg; - unsigned int writemask; - } dsts[2]; - unsigned int dst_count; - - struct sm4_src_register - { - struct sm4_register reg; - enum vkd3d_sm4_swizzle_type swizzle_type; - unsigned int swizzle; - } srcs[4]; - unsigned int src_count; - - uint32_t idx[3]; - unsigned int idx_count; -}; - -static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, - unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, - const struct hlsl_deref *deref, const struct hlsl_type *data_type) -{ - const struct hlsl_ir_var *var = deref->var; - - if (var->is_uniform) - { - if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) - { - reg->type = VKD3D_SM4_RT_RESOURCE; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) - { - reg->type = VKD3D_SM5_RT_UAV; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) - { - reg->type = VKD3D_SM4_RT_SAMPLER; - reg->dim = VKD3D_SM4_DIMENSION_NONE; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - - assert(data_type->type <= HLSL_CLASS_VECTOR); - reg->type = VKD3D_SM4_RT_CONSTBUFFER; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->buffer->reg.id; - reg->idx[1] = offset / 4; - reg->idx_count = 2; - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } - else if (var->is_input_semantic) - { - bool has_idx; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0] = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_INPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else if (var->is_output_semantic) - { - bool has_idx; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - - if (has_idx) - { - reg->idx[0] = var->semantic.index + offset / 4; - reg->idx_count = 1; - } - - if (reg->type == VKD3D_SM4_RT_DEPTHOUT) - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - else - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_OUTPUT; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } - } - else - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - assert(hlsl_reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - if (swizzle_type) - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; - } -} - -static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, - const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) -{ - unsigned int writemask; - - sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -} - -static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, - enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) -{ - assert(instr->reg.allocated); - reg->type = VKD3D_SM4_RT_TEMP; - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - -static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) -{ - unsigned int swizzle_type; - - sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); -} - -static void sm4_src_from_node(struct sm4_src_register *src, - const struct hlsl_ir_node *instr, unsigned int map_writemask) -{ - unsigned int writemask; - - sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -} - -static uint32_t sm4_encode_register(const struct sm4_register *reg) -{ - return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) - | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) - | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); -} - -static uint32_t sm4_register_order(const struct sm4_register *reg) -{ - uint32_t order = 1; - if (reg->type == VKD3D_SM4_RT_IMMCONST) - order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; - order += reg->idx_count; - if (reg->mod) - ++order; - return order; -} - -static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) -{ - uint32_t token = instr->opcode; - unsigned int size = 1, i, j; - - size += instr->modifier_count; - for (i = 0; i < instr->dst_count; ++i) - size += sm4_register_order(&instr->dsts[i].reg); - for (i = 0; i < instr->src_count; ++i) - size += sm4_register_order(&instr->srcs[i].reg); - size += instr->idx_count; - - token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); - - if (instr->modifier_count > 0) - token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - put_u32(buffer, token); - - for (i = 0; i < instr->modifier_count; ++i) - { - token = sm4_encode_instruction_modifier(&instr->modifiers[i]); - if (instr->modifier_count > i + 1) - token |= VKD3D_SM4_INSTRUCTION_MODIFIER; - put_u32(buffer, token); - } - - for (i = 0; i < instr->dst_count; ++i) - { - token = sm4_encode_register(&instr->dsts[i].reg); - if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; - put_u32(buffer, token); - - for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) - put_u32(buffer, instr->dsts[i].reg.idx[j]); - } - - for (i = 0; i < instr->src_count; ++i) - { - token = sm4_encode_register(&instr->srcs[i].reg); - token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; - if (instr->srcs[i].reg.mod) - token |= VKD3D_SM4_EXTENDED_OPERAND; - put_u32(buffer, token); - - if (instr->srcs[i].reg.mod) - put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) - | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); - - for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) - put_u32(buffer, instr->srcs[i].reg.idx[j]); - - if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); - if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) - { - put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); - put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); - } - } - } - - for (j = 0; j < instr->idx_count; ++j) - put_u32(buffer, instr->idx[j]); -} - -static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction_modifier modif; - struct hlsl_ir_constant *offset; - - if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) - return false; - offset = hlsl_ir_constant(texel_offset); - - modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; - modif.u.aoffimmi.u = offset->value[0].i; - modif.u.aoffimmi.v = offset->value[1].i; - modif.u.aoffimmi.w = offset->value[2].i; - if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 - || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 - || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) - return false; - - instr->modifiers[instr->modifier_count++] = modif; - return true; -} - -static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) -{ - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, - .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, - .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, - .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), - .src_count = 1, - }; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - const struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr = - { - .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) - | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), - - .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - - .idx[0] = sm4_resource_format(var->data_type) * 0x1111, - .idx_count = 1, - }; - - if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { - instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) -{ - const struct hlsl_profile_info *profile = ctx->profile; - const bool output = var->is_output_semantic; - D3D_NAME usage; - bool has_idx; - - struct sm4_instruction instr = - { - .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, - .dst_count = 1, - }; - - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) - { - if (has_idx) - { - instr.dsts[0].reg.idx[0] = var->semantic.index; - instr.dsts[0].reg.idx_count = 1; - } - else - { - instr.dsts[0].reg.idx_count = 0; - } - instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; - } - else - { - instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; - instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; - instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - - if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) - instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; - - if (var->is_input_semantic) - { - switch (usage) - { - case D3D_NAME_UNDEFINED: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; - break; - - case D3D_NAME_INSTANCE_ID: - case D3D_NAME_PRIMITIVE_ID: - case D3D_NAME_VERTEX_ID: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; - break; - - default: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; - break; - } - - if (profile->type == VKD3D_SHADER_TYPE_PIXEL) - { - enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; - - if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) - mode = VKD3DSIM_CONSTANT; - - instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - } - } - else - { - if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; - else - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; - } - - switch (usage) - { - case D3D_NAME_COVERAGE: - case D3D_NAME_DEPTH: - case D3D_NAME_DEPTH_GREATER_EQUAL: - case D3D_NAME_DEPTH_LESS_EQUAL: - case D3D_NAME_TARGET: - case D3D_NAME_UNDEFINED: - break; - - default: - instr.idx_count = 1; - instr.idx[0] = usage; - break; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_TEMPS, - - .idx = {temp_count}, - .idx_count = 1, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - - .idx = {thread_count[0], thread_count[1], thread_count[2]}, - .idx_count = 3, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_RET, - }; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); - instr.srcs[0].reg.mod = src_mod; - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -/* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - - assert(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - assert(1 - dst_idx >= 0); - instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; - instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; - instr.dsts[1 - dst_idx].reg.idx_count = 0; - instr.dst_count = 2; - - sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); - sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_constant(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) -{ - const unsigned int dimx = constant->node.data_type->dimx; - struct sm4_instruction instr; - struct sm4_register *reg = &instr.srcs[0].reg; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &constant->node); - instr.dst_count = 1; - - instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->type = VKD3D_SM4_RT_IMMCONST; - if (dimx == 1) - { - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = constant->value[0].u; - } - else - { - unsigned int i, j = 0; - - reg->dim = VKD3D_SM4_DIMENSION_VEC4; - for (i = 0; i < 4; ++i) - { - if (instr.dsts[0].writemask & (1u << i)) - reg->immconst_uint[i] = constant->value[j++].u; - } - } - instr.src_count = 1, - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset) -{ - bool uav = (resource_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr; - unsigned int dim_count; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - - if (!uav) - { - /* Mipmap level is in the last component in the IR, but needs to be in the W - * component in the instruction. */ - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - if (dim_count == 1) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); - if (dim_count == 2) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); - } - - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE; - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7."); - return; - } - } - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 3; - - write_sm4_instruction(buffer, &instr); -} - -static bool type_is_float(const struct hlsl_type *type) -{ - return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; -} - -static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_AND; - - sm4_dst_from_node(&instr.dsts[0], &expr->node); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); - instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; - instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; - instr.srcs[1].reg.immconst_uint[0] = mask; - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_cast(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -{ - static const union - { - uint32_t u; - float f; - } one = { .f = 1.0 }; - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; - - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); - - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_INT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_UINT: - switch (src_type->base_type) - { - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_TYPE_BOOL: - write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); - break; - - default: - vkd3d_unreachable(); - } - break; - - case HLSL_TYPE_HALF: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to half."); - break; - - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); - break; - - case HLSL_TYPE_BOOL: - /* Casts to bool should have already been lowered. */ - default: - vkd3d_unreachable(); - } -} - -static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - - sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); - instr.src_count = 2; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_expr(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) -{ - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; - const struct hlsl_type *dst_type = expr->node.data_type; - struct vkd3d_string_buffer *dst_type_string; - - assert(expr->node.reg.allocated); - - if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) - return; - - switch (expr->op) - { - case HLSL_OP1_ABS: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_BIT_NOT: - assert(type_is_integer(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_CAST: - write_sm4_cast(ctx, buffer, expr); - break; - - case HLSL_OP1_COS: - assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_EXP2: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FLOOR: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_NEG: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_OP1_ROUND: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: - assert(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: - assert(type_is_float(dst_type)); - write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP2_ADD: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_BIT_AND: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: - assert(type_is_integer(dst_type)); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_DIV: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_DOT: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - switch (arg1->data_type->dimx) - { - case 4: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); - break; - - case 3: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); - break; - - case 2: - write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); - break; - - case 1: - default: - vkd3d_unreachable(); - } - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_EQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_GEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LESS: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_LOGIC_AND: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: - assert(dst_type->base_type == HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_MAX: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MIN: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MOD: - switch (dst_type->base_type) - { - case HLSL_TYPE_UINT: - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_MUL: - switch (dst_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - /* Using IMUL instead of UMUL because we're taking the low - * bits, and the native compiler generates IMUL. */ - write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); - } - break; - - case HLSL_OP2_NEQUAL: - { - const struct hlsl_type *src_type = arg1->data_type; - - assert(dst_type->base_type == HLSL_TYPE_BOOL); - - switch (src_type->base_type) - { - case HLSL_TYPE_FLOAT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", - debug_hlsl_type(ctx, src_type)); - break; - } - break; - } - - case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->base_type != HLSL_TYPE_BOOL); - write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; - - default: - hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); - } - - hlsl_release_string_buffer(ctx, dst_type_string); -} - -static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - - assert(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &iff->then_instrs); - - if (!list_empty(&iff->else_instrs.instrs)) - { - instr.opcode = VKD3D_SM4_OP_ELSE; - instr.src_count = 0; - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &iff->else_instrs); - } - - instr.opcode = VKD3D_SM4_OP_ENDIF; - instr.src_count = 0; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_jump(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) -{ - struct sm4_instruction instr = {0}; - - switch (jump->type) - { - case HLSL_IR_JUMP_BREAK: - instr.opcode = VKD3D_SM4_OP_BREAK; - break; - - case HLSL_IR_JUMP_RETURN: - vkd3d_unreachable(); - - default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); - return; - } - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) -{ - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; - - sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, load->node.data_type, instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_loop(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) -{ - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_LOOP, - }; - - write_sm4_instruction(buffer, &instr); - - write_sm4_block(ctx, buffer, &loop->body); - - instr.opcode = VKD3D_SM4_OP_ENDLOOP; - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) -{ - struct sm4_src_register *src; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - - instr.opcode = VKD3D_SM4_OP_GATHER4; - - sm4_dst_from_node(&instr.dsts[0], dst); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); - - if (texel_offset) - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { - if (ctx->profile->major_version < 5) - { - hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); - return; - } - instr.opcode = VKD3D_SM5_OP_GATHER4_PO; - sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); - } - } - - sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); - - src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); - src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; - src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; - src->swizzle = swizzle; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_resource_load(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) -{ - const struct hlsl_type *resource_type = load->resource.var->data_type; - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; - - if (resource_type->type != HLSL_CLASS_OBJECT) - { - assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); - return; - } - - if (load->sampler.var) - { - const struct hlsl_type *sampler_type = load->sampler.var->data_type; - - if (sampler_type->type != HLSL_CLASS_OBJECT) - { - assert(sampler_type->type == HLSL_CLASS_ARRAY || sampler_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); - return; - } - assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); - assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC); - - if (!load->sampler.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - } - - if (!load->resource.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); - return; - } - - switch (load->load_type) - { - case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } - write_sm4_sample(ctx, buffer, resource_type, &load->node, - &load->resource, &load->sampler, coords, texel_offset); - break; - - case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); - break; - - case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); - break; - - case HLSL_RESOURCE_SAMPLE_LOD: - hlsl_fixme(ctx, &load->node.loc, "SM4 sample-LOD expression."); - break; - } -} - -static void write_sm4_resource_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) -{ - const struct hlsl_type *resource_type = store->resource.var->data_type; - - if (resource_type->type != HLSL_CLASS_OBJECT) - { - assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); - return; - } - - if (!store->resource.var->is_uniform) - { - hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); - return; - } - - write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); -} - -static void write_sm4_store(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) -{ - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; - unsigned int writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); - instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - - sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_swizzle(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) -{ - struct sm4_instruction instr; - unsigned int writemask; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &swizzle->node); - instr.dst_count = 1; - - sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); - instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), - swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - -static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block) -{ - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { - if (instr->data_type) - { - if (instr->data_type->type == HLSL_CLASS_MATRIX) - { - hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); - break; - } - else if (instr->data_type->type == HLSL_CLASS_OBJECT) - { - hlsl_fixme(ctx, &instr->loc, "Object copy."); - break; - } - - assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR); - } - - switch (instr->type) - { - case HLSL_IR_CALL: - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: - write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); - break; - - case HLSL_IR_EXPR: - write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); - break; - - case HLSL_IR_IF: - write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); - break; - - case HLSL_IR_JUMP: - write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); - break; - - case HLSL_IR_LOAD: - write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); - break; - - case HLSL_IR_RESOURCE_LOAD: - write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); - break; - - case HLSL_IR_RESOURCE_STORE: - write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); - break; - - case HLSL_IR_LOOP: - write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); - break; - - case HLSL_IR_STORE: - write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); - break; - - case HLSL_IR_SWIZZLE: - write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); - break; - - default: - hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } - } -} - -static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) -{ - const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; - struct vkd3d_bytecode_buffer buffer = {0}; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; - const struct hlsl_ir_var *var; - size_t token_count_position; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { - VKD3D_SM4_PS, - VKD3D_SM4_VS, - VKD3D_SM4_GS, - VKD3D_SM5_HS, - VKD3D_SM5_DS, - VKD3D_SM5_CS, - 0, /* EFFECT */ - 0, /* TEXTURE */ - VKD3D_SM4_LIB, - }; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - - put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); - token_count_position = put_u32(&buffer, 0); - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(&buffer, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) - { - var = extern_resources[i]; - - if (var->data_type->base_type == HLSL_TYPE_SAMPLER) - write_sm4_dcl_sampler(&buffer, var); - else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) - write_sm4_dcl_texture(&buffer, var); - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(ctx, &buffer, var); - } - - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&buffer, ctx->thread_count); - - if (ctx->temp_count) - write_sm4_dcl_temps(&buffer, ctx->temp_count); - - write_sm4_block(ctx, &buffer, &entry_func->body); - - write_sm4_ret(&buffer); - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - - dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size); - - vkd3d_free(extern_resources); -} - -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -{ - struct dxbc_writer dxbc; - size_t i; - int ret; - - dxbc_writer_init(&dxbc); - - write_sm4_signature(ctx, &dxbc, false); - write_sm4_signature(ctx, &dxbc, true); - write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &dxbc); - - if (!(ret = ctx->result)) - ret = dxbc_writer_write(&dxbc, out); - for (i = 0; i < dxbc.section_count; ++i) - vkd3d_shader_free_shader_code(&dxbc.sections[i].data); - return ret; -} diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c new file mode 100644 index 00000000000..9eefb82c226 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -0,0 +1,1072 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; +} + +static bool shader_instruction_is_dcl(const struct vkd3d_shader_instruction *ins) +{ + return (VKD3DSIH_DCL <= ins->handler_idx && ins->handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) + || ins->handler_idx == VKD3DSIH_HS_DECLS; +} + +static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +{ + ins->handler_idx = VKD3DSIH_NOP; + ins->dst_count = 0; + ins->src_count = 0; + ins->dst = NULL; + ins->src = NULL; +} + +static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg, + unsigned int instance_id) +{ + unsigned int i; + + for (i = 0; i < reg->idx_count; ++i) + { + if (reg->idx[i].rel_addr && shader_register_is_phase_instance_id(®->idx[i].rel_addr->reg)) + { + reg->idx[i].rel_addr = NULL; + reg->idx[i].offset += instance_id; + } + } +} + +static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_instruction *ins, + unsigned int instance_id) +{ + struct vkd3d_shader_register *reg; + unsigned int i; + + for (i = 0; i < ins->src_count; ++i) + { + reg = (struct vkd3d_shader_register *)&ins->src[i].reg; + if (shader_register_is_phase_instance_id(reg)) + { + reg->type = VKD3DSPR_IMMCONST; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx_count = 0; + reg->immconst_type = VKD3D_IMMCONST_SCALAR; + reg->u.immconst_uint[0] = instance_id; + continue; + } + shader_register_eliminate_phase_addressing(reg, instance_id); + } + + for (i = 0; i < ins->dst_count; ++i) + shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); +} + +struct hull_flattener +{ + struct vkd3d_shader_instruction_array instructions; + + unsigned int max_temp_count; + unsigned int temp_dcl_idx; + + unsigned int instance_count; + unsigned int phase_body_idx; + enum vkd3d_shader_opcode phase; +}; + +static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener) +{ + return flattener->phase == VKD3DSIH_HS_FORK_PHASE || flattener->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +struct shader_phase_location +{ + unsigned int index; + unsigned int instance_count; + unsigned int instruction_count; +}; + +struct shader_phase_location_array +{ + /* Unlikely worst case: one phase for each component of each output register. */ + struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE]; + unsigned int count; +}; + +static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser, + unsigned int index, struct shader_phase_location_array *locations) +{ + struct vkd3d_shader_instruction *ins = &normaliser->instructions.elements[index]; + struct shader_phase_location *loc; + bool b; + + if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + { + b = flattener_is_in_fork_or_join_phase(normaliser); + /* Reset the phase info. */ + normaliser->phase_body_idx = ~0u; + normaliser->phase = ins->handler_idx; + normaliser->instance_count = 1; + /* Leave the first occurrence and delete the rest. */ + if (b) + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT + || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + { + normaliser->instance_count = ins->declaration.count + !ins->declaration.count; + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( + &ins->declaration.dst.reg)) + { + vkd3d_shader_instruction_make_nop(ins); + return; + } + else if (ins->handler_idx == VKD3DSIH_DCL_TEMPS && normaliser->phase != VKD3DSIH_INVALID) + { + /* Leave only the first temp declaration and set it to the max count later. */ + if (!normaliser->max_temp_count) + normaliser->temp_dcl_idx = index; + else + vkd3d_shader_instruction_make_nop(ins); + normaliser->max_temp_count = max(normaliser->max_temp_count, ins->declaration.count); + return; + } + + if (normaliser->phase == VKD3DSIH_INVALID || shader_instruction_is_dcl(ins)) + return; + + if (normaliser->phase_body_idx == ~0u) + normaliser->phase_body_idx = index; + + if (ins->handler_idx == VKD3DSIH_RET) + { + vkd3d_shader_instruction_make_nop(ins); + if (locations->count >= ARRAY_SIZE(locations->locations)) + { + FIXME("Insufficient space for phase location.\n"); + return; + } + loc = &locations->locations[locations->count++]; + loc->index = normaliser->phase_body_idx; + loc->instance_count = normaliser->instance_count; + loc->instruction_count = index - normaliser->phase_body_idx; + } +} + +static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser, + struct shader_phase_location_array *locations) +{ + struct shader_phase_location *loc; + unsigned int i, j, k, end, count; + + for (i = 0, count = 0; i < locations->count; ++i) + count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + end = normaliser->instructions.count; + normaliser->instructions.count += count; + + for (i = locations->count; i > 0; --i) + { + loc = &locations->locations[i - 1]; + j = loc->index + loc->instruction_count; + memmove(&normaliser->instructions.elements[j + count], &normaliser->instructions.elements[j], + (end - j) * sizeof(*normaliser->instructions.elements)); + end = j; + count -= (loc->instance_count - 1) * loc->instruction_count; + loc->index += count; + } + + for (i = 0, count = 0; i < locations->count; ++i) + { + loc = &locations->locations[i]; + /* Make a copy of the non-dcl instructions for each instance. */ + for (j = 1; j < loc->instance_count; ++j) + { + for (k = 0; k < loc->instruction_count; ++k) + { + if (!shader_instruction_array_clone_instruction(&normaliser->instructions, + loc->index + loc->instruction_count * j + k, loc->index + k)) + return VKD3D_ERROR_OUT_OF_MEMORY; + } + } + /* Replace each reference to the instance id with a constant instance id. */ + for (j = 0; j < loc->instance_count; ++j) + { + for (k = 0; k < loc->instruction_count; ++k) + shader_instruction_eliminate_phase_instance_id( + &normaliser->instructions.elements[loc->index + loc->instruction_count * j + k], j); + } + } + + return VKD3D_OK; +} + +static void shader_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ + reg->type = reg_type; + reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + reg->non_uniform = false; + reg->data_type = data_type; + reg->idx[0].offset = ~0u; + reg->idx[0].rel_addr = NULL; + reg->idx[1].offset = ~0u; + reg->idx[1].rel_addr = NULL; + reg->idx[2].offset = ~0u; + reg->idx[2].rel_addr = NULL; + reg->idx_count = idx_count; + reg->immconst_type = VKD3D_IMMCONST_SCALAR; +} + +static void shader_instruction_init(struct vkd3d_shader_instruction *ins, enum vkd3d_shader_opcode handler_idx) +{ + memset(ins, 0, sizeof(*ins)); + ins->handler_idx = handler_idx; +} + +enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions) +{ + struct hull_flattener flattener = {*src_instructions}; + struct vkd3d_shader_instruction_array *instructions; + struct shader_phase_location_array locations; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; + + instructions = &flattener.instructions; + + flattener.phase = VKD3DSIH_INVALID; + for (i = 0, locations.count = 0; i < instructions->count; ++i) + flattener_eliminate_phase_related_dcls(&flattener, i, &locations); + + if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) + return result; + + if (flattener.phase != VKD3DSIH_INVALID) + { + if (flattener.temp_dcl_idx) + instructions->elements[flattener.temp_dcl_idx].declaration.count = flattener.max_temp_count; + + if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + shader_instruction_init(&instructions->elements[instructions->count++], VKD3DSIH_RET); + } + + *src_instructions = flattener.instructions; + return result; +} + +struct control_point_normaliser +{ + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_opcode phase; + struct vkd3d_shader_src_param *outpointid_param; +}; + +static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( + struct vkd3d_shader_instruction_array *instructions) +{ + struct vkd3d_shader_src_param *rel_addr; + + if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) + return NULL; + + shader_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); + rel_addr->swizzle = 0; + rel_addr->modifiers = 0; + + return rel_addr; +} + +static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, + struct control_point_normaliser *normaliser) +{ + struct vkd3d_shader_register *reg = &dst_param->reg; + + if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) + { + /* The TPF reader validates idx_count. */ + assert(reg->idx_count == 1); + reg->idx[1] = reg->idx[0]; + /* The control point id param is implicit here. Avoid later complications by inserting it. */ + reg->idx[0].offset = 0; + reg->idx[0].rel_addr = normaliser->outpointid_param; + ++reg->idx_count; + } +} + +static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, + enum vkd3d_shader_register_type reg_type, unsigned int idx_count) +{ + param->write_mask = e->mask; + param->modifiers = 0; + param->shift = 0; + shader_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); +} + +static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, + const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst) +{ + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_dst_param *param; + const struct signature_element *e; + unsigned int i, count; + + for (i = 0, count = 1; i < s->element_count; ++i) + count += !!s->elements[i].used_mask; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst], + (normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements)); + normaliser->instructions.count += count; + + ins = &normaliser->instructions.elements[dst]; + shader_instruction_init(ins, VKD3DSIH_HS_CONTROL_POINT_PHASE); + ins->flags = 1; + ++ins; + + for (i = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + if (!e->used_mask) + continue; + + if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) + { + shader_instruction_init(ins, VKD3DSIH_DCL_INPUT_SIV); + param = &ins->declaration.register_semantic.reg; + ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); + } + else + { + shader_instruction_init(ins, VKD3DSIH_DCL_INPUT); + param = &ins->declaration.dst; + } + + shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); + param->reg.idx[0].offset = input_control_point_count; + param->reg.idx[1].offset = i; + + ++ins; + } + + return VKD3D_OK; +} + +enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature) +{ + struct vkd3d_shader_instruction_array *instructions; + struct control_point_normaliser normaliser; + unsigned int input_control_point_count; + struct vkd3d_shader_instruction *ins; + enum vkd3d_result ret; + unsigned int i, j; + + if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) + { + ERR("Failed to allocate src param.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + normaliser.instructions = *src_instructions; + instructions = &normaliser.instructions; + normaliser.phase = VKD3DSIH_INVALID; + + for (i = 0; i < normaliser.instructions.count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser.phase = ins->handler_idx; + break; + default: + if (shader_instruction_is_dcl(ins)) + break; + for (j = 0; j < ins->dst_count; ++j) + shader_dst_param_normalise_outpointid((struct vkd3d_shader_dst_param *)&ins->dst[j], &normaliser); + break; + } + } + + normaliser.phase = VKD3DSIH_INVALID; + input_control_point_count = 1; + + for (i = 0; i < instructions->count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + input_control_point_count = ins->declaration.count; + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + *src_instructions = normaliser.instructions; + return VKD3D_OK; + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature, + input_control_point_count, i); + *src_instructions = normaliser.instructions; + return ret; + default: + break; + } + } + + *src_instructions = normaliser.instructions; + return VKD3D_OK; +} + +struct io_normaliser +{ + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_type shader_type; + struct shader_signature *input_signature; + struct shader_signature *output_signature; + struct shader_signature *patch_constant_signature; + + unsigned int max_temp_count; + unsigned int temp_dcl_idx; + + unsigned int instance_count; + unsigned int phase_body_idx; + enum vkd3d_shader_opcode phase; + unsigned int output_control_point_count; + + struct vkd3d_shader_src_param *outpointid_param; + + struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; + struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; + struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT]; + uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; + uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; + uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; +}; + +static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) +{ + return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask) +{ + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + struct signature_element *e = &signature->elements[i]; + if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx + && (e->mask & write_mask) == write_mask) + { + return i; + } + } + + /* Validated in the TPF reader. */ + vkd3d_unreachable(); +} + +static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], + unsigned int register_idx, unsigned int write_mask) +{ + return range_map[register_idx][vkd3d_write_mask_get_component_idx(write_mask)]; +} + +static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx, + unsigned int register_count, unsigned int write_mask, bool is_dcl_indexrange) +{ + unsigned int i, j, r, c, component_idx, component_count; + + assert(write_mask <= VKD3DSP_WRITEMASK_ALL); + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + component_count = vkd3d_write_mask_component_count(write_mask); + + assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); + + if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) + { + /* Validated in the TPF reader. */ + assert(range_map[register_idx][component_idx] != UINT8_MAX); + return; + } + if (range_map[register_idx][component_idx] == register_count) + { + /* Already done. This happens when fxc splits a register declaration by + * component(s). The dcl_indexrange instructions are split too. */ + return; + } + range_map[register_idx][component_idx] = register_count; + + for (i = 0; i < register_count; ++i) + { + r = register_idx + i; + for (j = !i; j < component_count; ++j) + { + c = component_idx + j; + /* A synthetic patch constant range which overlaps an existing range can start upstream of it + * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. + * The latter is validated in the TPF reader. */ + assert(!range_map[r][c] || !is_dcl_indexrange); + range_map[r][c] = UINT8_MAX; + } + } +} + +static void io_normaliser_add_index_range(struct io_normaliser *normaliser, + const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; + const struct vkd3d_shader_register *reg = &range->dst.reg; + unsigned int reg_idx, write_mask, element_idx; + const struct shader_signature *signature; + uint8_t (*range_map)[VKD3D_VEC4_SIZE]; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + range_map = normaliser->input_range_map; + signature = normaliser->input_signature; + break; + case VKD3DSPR_OUTCONTROLPOINT: + range_map = normaliser->output_range_map; + signature = normaliser->output_signature; + break; + case VKD3DSPR_OUTPUT: + if (!io_normaliser_is_in_fork_or_join_phase(normaliser)) + { + range_map = normaliser->output_range_map; + signature = normaliser->output_signature; + break; + } + /* fall through */ + case VKD3DSPR_PATCHCONST: + range_map = normaliser->pc_range_map; + signature = normaliser->patch_constant_signature; + break; + default: + /* Validated in the TPF reader. */ + vkd3d_unreachable(); + } + + reg_idx = reg->idx[reg->idx_count - 1].offset; + write_mask = range->dst.write_mask; + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + range_map_set_register_range(range_map, reg_idx, range->register_count, + signature->elements[element_idx].mask, true); +} + +static int signature_element_mask_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + int ret; + + return (ret = vkd3d_u32_compare(e->mask, f->mask)) ? ret : vkd3d_u32_compare(e->register_index, f->register_index); +} + +static bool sysval_semantics_should_merge(const struct signature_element *e, const struct signature_element *f) +{ + if (e->sysval_semantic < VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE + || e->sysval_semantic > VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + return false; + + return e->sysval_semantic == f->sysval_semantic + /* Line detail and density must be merged together to match the SPIR-V array. + * This deletes one of the two sysvals, but these are not used. */ + || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET + && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) + || (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN + && f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET); +} + +/* Merge tess factor sysvals because they are an array in SPIR-V. */ +static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s, + uint8_t range_map[][VKD3D_VEC4_SIZE]) +{ + struct signature_element *e, *f; + unsigned int i, j, register_count; + + qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare); + + for (i = 0; i < s->element_count; i += register_count) + { + e = &s->elements[i]; + register_count = 1; + + if (!e->sysval_semantic) + continue; + + for (j = i + 1; j < s->element_count; ++j, ++register_count) + { + f = &s->elements[j]; + if (f->register_index != e->register_index + register_count || !sysval_semantics_should_merge(e, f)) + break; + } + if (register_count < 2) + continue; + + range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false); + } +} + +static int signature_element_register_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + + return vkd3d_u32_compare(e->register_index, f->register_index); +} + +static int signature_element_index_compare(const void *a, const void *b) +{ + const struct signature_element *e = a, *f = b; + + return vkd3d_u32_compare(e->sort_index, f->sort_index); +} + +static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], + bool is_patch_constant) +{ + unsigned int i, j, element_count, new_count, register_count; + struct signature_element *elements; + struct signature_element *e, *f; + + element_count = s->element_count; + if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) + return false; + memcpy(elements, s->elements, element_count * sizeof(*elements)); + + qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); + + for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) + { + e = &elements[i]; + j = i + 1; + + if (e->register_index == ~0u) + continue; + + /* Do not merge if the register index will be relative-addressed. */ + if (range_map_get_register_count(range_map, e->register_index, e->mask) > 1) + continue; + + for (; j < element_count; ++j) + { + f = &elements[j]; + + /* Merge different components of the same register unless sysvals are different, + * or it will be relative-addressed. */ + if (f->register_index != e->register_index || f->sysval_semantic != e->sysval_semantic + || range_map_get_register_count(range_map, f->register_index, f->mask) > 1) + break; + + TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, + e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); + assert(!(e->mask & f->mask)); + + e->mask |= f->mask; + e->used_mask |= f->used_mask; + e->semantic_index = min(e->semantic_index, f->semantic_index); + } + } + element_count = new_count; + vkd3d_free(s->elements); + s->elements = elements; + s->element_count = element_count; + + if (is_patch_constant) + shader_signature_map_patch_constant_index_ranges(s, range_map); + + for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e) + { + e = &elements[i]; + register_count = 1; + + if (e->register_index >= MAX_REG_OUTPUT) + continue; + + register_count = range_map_get_register_count(range_map, e->register_index, e->mask); + assert(register_count != UINT8_MAX); + register_count += !register_count; + + if (register_count > 1) + { + TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); + e->register_count = register_count; + } + } + element_count = new_count; + + /* Restoring the original order is required for sensible trace output. */ + qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare); + + s->element_count = element_count; + + return true; +} + +static bool sysval_semantic_is_tess_factor(enum vkd3d_shader_sysval_semantic sysval_semantic) +{ + return sysval_semantic >= VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE + && sysval_semantic <= VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; +} + +static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, + unsigned int id_idx, unsigned int register_index) +{ + assert(id_idx < ARRAY_SIZE(reg->idx) - 1); + + /* For a relative-addressed register index, move the id up a slot to separate it from the address, + * because rel_addr can be replaced with a constant offset in some cases. */ + if (reg->idx[id_idx].rel_addr) + { + reg->idx[id_idx + 1].rel_addr = NULL; + reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; + reg->idx[id_idx].offset -= register_index; + ++id_idx; + } + /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where + * tessellation level registers are merged into an array because they're an array in SPIR-V. */ + else + { + ++id_idx; + memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); + reg->idx[0].rel_addr = NULL; + reg->idx[0].offset = reg->idx[id_idx].offset - register_index; + } + + return id_idx; +} + +static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, + struct io_normaliser *normaliser) + { + unsigned int id_idx, reg_idx, write_mask, element_idx; + struct vkd3d_shader_register *reg = &dst_param->reg; + struct vkd3d_shader_dst_param **dcl_params; + const struct shader_signature *signature; + const struct signature_element *e; + + if ((reg->type == VKD3DSPR_OUTPUT && io_normaliser_is_in_fork_or_join_phase(normaliser)) + || reg->type == VKD3DSPR_PATCHCONST) + { + signature = normaliser->patch_constant_signature; + /* Convert patch constant outputs to the patch constant register type to avoid the need + * to convert compiler symbols when accessed as inputs in a later stage. */ + reg->type = VKD3DSPR_PATCHCONST; + dcl_params = normaliser->pc_dcl_params; + } + else if (reg->type == VKD3DSPR_OUTPUT || dst_param->reg.type == VKD3DSPR_COLOROUT) + { + signature = normaliser->output_signature; + dcl_params = normaliser->output_dcl_params; + } + else if (dst_param->reg.type == VKD3DSPR_INCONTROLPOINT || dst_param->reg.type == VKD3DSPR_INPUT) + { + signature = normaliser->input_signature; + dcl_params = normaliser->input_dcl_params; + } + else + { + return true; + } + + id_idx = reg->idx_count - 1; + reg_idx = reg->idx[id_idx].offset; + write_mask = dst_param->write_mask; + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + e = &signature->elements[element_idx]; + + dst_param->write_mask >>= vkd3d_write_mask_get_component_idx(e->mask); + if (is_io_dcl) + { + /* Validated in the TPF reader. */ + assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); + + if (dcl_params[element_idx]) + { + /* Merge split declarations into a single one. */ + dcl_params[element_idx]->write_mask |= dst_param->write_mask; + /* Turn this into a nop. */ + return false; + } + else + { + dcl_params[element_idx] = dst_param; + } + } + + if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) + { + if (is_io_dcl) + { + /* Emit an array size for the control points for consistency with inputs. */ + reg->idx[0].offset = normaliser->output_control_point_count; + } + else + { + /* The control point id param. */ + assert(reg->idx[0].rel_addr); + } + id_idx = 1; + } + + if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) + { + if (is_io_dcl) + { + /* For control point I/O, idx 0 contains the control point count. + * Ensure it is moved up to the next slot. */ + reg->idx[id_idx].offset = reg->idx[0].offset; + reg->idx[0].offset = e->register_count; + ++id_idx; + } + else + { + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + } + } + + /* Replace the register index with the signature element index */ + reg->idx[id_idx].offset = element_idx; + reg->idx_count = id_idx + 1; + + return true; +} + +static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_param, + struct io_normaliser *normaliser) +{ + unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx; + struct vkd3d_shader_register *reg = &src_param->reg; + const struct shader_signature *signature; + const struct signature_element *e; + + /* Input/output registers from one phase can be used as inputs in + * subsequent phases. Specifically: + * + * - Control phase inputs are available as "vicp" in fork and join + * phases. + * - Control phase outputs are available as "vocp" in fork and join + * phases. + * - Fork phase patch constants are available as "vpc" in join + * phases. + * + * We handle "vicp" here by converting INCONTROLPOINT src registers to + * type INPUT so they match the control phase declarations. We handle + * "vocp" by converting OUTCONTROLPOINT registers to type OUTPUT. + * Merging fork and join phases handles "vpc". */ + + switch (reg->type) + { + case VKD3DSPR_PATCHCONST: + signature = normaliser->patch_constant_signature; + break; + case VKD3DSPR_INCONTROLPOINT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + reg->type = VKD3DSPR_INPUT; + /* fall through */ + case VKD3DSPR_INPUT: + signature = normaliser->input_signature; + break; + case VKD3DSPR_OUTCONTROLPOINT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + reg->type = VKD3DSPR_OUTPUT; + /* fall through */ + case VKD3DSPR_OUTPUT: + signature = normaliser->output_signature; + break; + default: + return; + } + + id_idx = reg->idx_count - 1; + reg_idx = reg->idx[id_idx].offset; + write_mask = VKD3DSP_WRITEMASK_0 << vkd3d_swizzle_get_component(src_param->swizzle, 0); + element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + + e = &signature->elements[element_idx]; + if ((e->register_count > 1 || sysval_semantic_is_tess_factor(e->sysval_semantic))) + id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + reg->idx[id_idx].offset = element_idx; + reg->idx_count = id_idx + 1; + + if ((component_idx = vkd3d_write_mask_get_component_idx(e->mask))) + { + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + if (vkd3d_swizzle_get_component(src_param->swizzle, i)) + src_param->swizzle -= component_idx << VKD3D_SHADER_SWIZZLE_SHIFT(i); + } +} + +static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, + struct io_normaliser *normaliser) +{ + struct vkd3d_shader_register *reg; + bool keep = true; + unsigned int i; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_INPUT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) + { + reg = &ins->declaration.dst.reg; + /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their + * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ + if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) + vkd3d_shader_instruction_make_nop(ins); + else if (reg->type == VKD3DSPR_INCONTROLPOINT) + reg->type = VKD3DSPR_INPUT; + } + /* fall through */ + case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_OUTPUT: + keep = shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser); + break; + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_INPUT_SIV: + case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_OUTPUT_SIV: + keep = shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, + normaliser); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser->phase = ins->handler_idx; + memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); + memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); + memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); + break; + default: + if (shader_instruction_is_dcl(ins)) + break; + for (i = 0; i < ins->dst_count; ++i) + shader_dst_param_io_normalise((struct vkd3d_shader_dst_param *)&ins->dst[i], false, normaliser); + for (i = 0; i < ins->src_count; ++i) + shader_src_param_io_normalise((struct vkd3d_shader_src_param *)&ins->src[i], normaliser); + break; + } + + if (!keep) + shader_instruction_init(ins, VKD3DSIH_NOP); +} + +enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature) +{ + struct io_normaliser normaliser = {*instructions}; + struct vkd3d_shader_instruction *ins; + bool has_control_point_phase; + unsigned int i, j; + + normaliser.phase = VKD3DSIH_INVALID; + normaliser.shader_type = shader_type; + normaliser.input_signature = input_signature; + normaliser.output_signature = output_signature; + normaliser.patch_constant_signature = patch_constant_signature; + + for (i = 0, has_control_point_phase = false; i < instructions->count; ++i) + { + ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + normaliser.output_control_point_count = ins->declaration.count; + break; + case VKD3DSIH_DCL_INDEX_RANGE: + io_normaliser_add_index_range(&normaliser, ins); + vkd3d_shader_instruction_make_nop(ins); + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + has_control_point_phase = true; + /* fall through */ + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + normaliser.phase = ins->handler_idx; + break; + default: + break; + } + } + + if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase) + { + /* Inputs and outputs must match for the default phase, so merge ranges must match too. */ + for (i = 0; i < MAX_REG_OUTPUT; ++i) + { + for (j = 0; j < VKD3D_VEC4_SIZE; ++j) + { + if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j]) + normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; + else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) + normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; + else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); + } + } + } + + if (!shader_signature_merge(input_signature, normaliser.input_range_map, false) + || !shader_signature_merge(output_signature, normaliser.output_range_map, false) + || !shader_signature_merge(patch_constant_signature, normaliser.pc_range_map, true)) + { + *instructions = normaliser.instructions; + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + normaliser.phase = VKD3DSIH_INVALID; + for (i = 0; i < normaliser.instructions.count; ++i) + shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); + + *instructions = normaliser.instructions; + return VKD3D_OK; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h index 4860cf5f90e..e1cb75e177c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.h +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h @@ -22,7 +22,7 @@ #define __VKD3D_SHADER_PREPROC_H #include "vkd3d_shader_private.h" -#include "wine/rbtree.h" +#include "rbtree.h" struct preproc_if_state { diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index bb5a6b61de1..94079696280 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -41,6 +41,7 @@ static void update_location(struct preproc_ctx *ctx); %option bison-locations %option extra-type="struct preproc_ctx *" %option never-interactive +%option nodefault %option noinput %option nounput %option noyy_top_state @@ -75,6 +76,7 @@ INT_SUFFIX [uUlL]{0,2} "*/" {yy_pop_state(yyscanner);} <> {yy_pop_state(yyscanner);} . {} +\n {} (\\{NEWLINE}|[^\n])* {return T_STRING;} @@ -176,9 +178,9 @@ INT_SUFFIX [uUlL]{0,2} return T_NEWLINE; } -{WS}+ {} +{WS}+ {} [-()\[\]{},+!*/<>&|^?:] {return yytext[0];} -. {return T_TEXT;} +. {return T_TEXT;} %% diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 53e13735937..3542b5fac51 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -18,7 +18,7 @@ */ #include "vkd3d_shader_private.h" -#include "wine/rbtree.h" +#include "rbtree.h" #include #include @@ -168,7 +168,7 @@ static void vkd3d_spirv_validate(const struct vkd3d_shader_code *spirv, #endif /* HAVE_SPIRV_TOOLS */ -static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, +enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, unsigned int index) { switch (sysval) @@ -199,14 +199,9 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enu } } -static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) -{ - return vkd3d_siv_from_sysval_indexed(sysval, 0); -} - #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 -#define VKD3D_SPIRV_GENERATOR_VERSION 7 +#define VKD3D_SPIRV_GENERATOR_VERSION 8 #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) struct vkd3d_spirv_stream @@ -1967,11 +1962,9 @@ struct vkd3d_symbol_register_data uint32_t member_idx; enum vkd3d_shader_component_type component_type; unsigned int write_mask; - uint32_t dcl_mask; unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; /* An aggregate, i.e. a structure or an array. */ - bool is_dynamically_indexed; /* If member_idx is a variable ID instead of a constant. */ }; struct vkd3d_symbol_resource_data @@ -2064,10 +2057,14 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, symbol->type = VKD3D_SYMBOL_REGISTER; memset(&symbol->key, 0, sizeof(symbol->key)); symbol->key.reg.type = reg->type; - if (vkd3d_shader_register_is_input(reg) && reg->idx[1].offset != ~0u) - symbol->key.reg.idx = reg->idx[1].offset; + if (vkd3d_shader_register_is_input(reg) || vkd3d_shader_register_is_output(reg) + || vkd3d_shader_register_is_patch_constant(reg)) + { + symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; + assert(!reg->idx_count || symbol->key.reg.idx != ~0u); + } else if (reg->type != VKD3DSPR_IMMCONSTBUFFER) - symbol->key.reg.idx = reg->idx[0].offset; + symbol->key.reg.idx = reg->idx_count ? reg->idx[0].offset : ~0u; } static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, @@ -2080,11 +2077,9 @@ static void vkd3d_symbol_set_register_info(struct vkd3d_symbol *symbol, symbol->info.reg.member_idx = 0; symbol->info.reg.component_type = component_type; symbol->info.reg.write_mask = write_mask; - symbol->info.reg.dcl_mask = 0; symbol->info.reg.structure_stride = 0; symbol->info.reg.binding_base_idx = 0; symbol->info.reg.is_aggregate = false; - symbol->info.reg.is_dynamically_indexed = false; } static void vkd3d_symbol_make_resource(struct vkd3d_symbol *symbol, @@ -2197,11 +2192,7 @@ struct vkd3d_push_constant_buffer_binding struct vkd3d_shader_phase { - enum vkd3d_shader_opcode type; - unsigned int idx; - unsigned int instance_count; uint32_t function_id; - uint32_t instance_id; size_t function_location; }; @@ -2253,10 +2244,11 @@ struct spirv_compiler struct vkd3d_push_constant_buffer_binding *push_constants; const struct vkd3d_shader_spirv_target_info *spirv_target_info; + bool main_block_open; bool after_declarations_section; - const struct vkd3d_shader_signature *input_signature; - const struct vkd3d_shader_signature *output_signature; - const struct vkd3d_shader_signature *patch_constant_signature; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; const struct vkd3d_shader_transform_feedback_info *xfb_info; struct vkd3d_shader_output_info { @@ -2276,9 +2268,10 @@ struct spirv_compiler unsigned int output_control_point_count; bool use_vocp; - unsigned int shader_phase_count; - struct vkd3d_shader_phase *shader_phases; - size_t shader_phases_size; + enum vkd3d_shader_opcode phase; + bool emit_default_control_point_phase; + struct vkd3d_shader_phase control_point_phase; + struct vkd3d_shader_phase patch_constant_phase; uint32_t current_spec_constant_id; unsigned int spec_constant_count; @@ -2290,9 +2283,19 @@ struct spirv_compiler struct vkd3d_string_buffer_cache string_buffers; }; -static bool is_control_point_phase(const struct vkd3d_shader_phase *phase) +static bool is_in_default_phase(const struct spirv_compiler *compiler) +{ + return compiler->phase == VKD3DSIH_INVALID; +} + +static bool is_in_control_point_phase(const struct spirv_compiler *compiler) +{ + return compiler->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +} + +static bool is_in_fork_or_join_phase(const struct spirv_compiler *compiler) { - return phase && phase->type == VKD3DSIH_HS_CONTROL_POINT_PHASE; + return compiler->phase == VKD3DSIH_HS_FORK_PHASE || compiler->phase == VKD3DSIH_HS_JOIN_PHASE; } static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler); @@ -2304,13 +2307,37 @@ static const char *spirv_compiler_get_entry_point_name(const struct spirv_compil return info && info->entry_point ? info->entry_point : "main"; } -struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +static void spirv_compiler_destroy(struct spirv_compiler *compiler) +{ + vkd3d_free(compiler->control_flow_info); + + vkd3d_free(compiler->output_info); + + vkd3d_free(compiler->push_constants); + vkd3d_free(compiler->descriptor_offset_ids); + + vkd3d_spirv_builder_free(&compiler->spirv_builder); + + rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); + + vkd3d_free(compiler->spec_constants); + + vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + + shader_signature_cleanup(&compiler->input_signature); + shader_signature_cleanup(&compiler->output_signature); + shader_signature_cleanup(&compiler->patch_constant_signature); + + vkd3d_free(compiler); +} + +static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, + struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) { - const struct vkd3d_shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; - const struct vkd3d_shader_signature *output_signature = &shader_desc->output_signature; + const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; + const struct shader_signature *output_signature = &shader_desc->output_signature; const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; @@ -2402,9 +2429,12 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * compiler->shader_type = shader_version->type; - compiler->input_signature = &shader_desc->input_signature; - compiler->output_signature = &shader_desc->output_signature; - compiler->patch_constant_signature = &shader_desc->patch_constant_signature; + compiler->input_signature = shader_desc->input_signature; + compiler->output_signature = shader_desc->output_signature; + compiler->patch_constant_signature = shader_desc->patch_constant_signature; + memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); + memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); + memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { @@ -2437,6 +2467,8 @@ struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version * compiler->scan_descriptor_info = scan_descriptor_info; + compiler->phase = VKD3DSIH_INVALID; + vkd3d_string_buffer_cache_init(&compiler->string_buffers); spirv_compiler_emit_initial_declarations(compiler); @@ -2857,7 +2889,7 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s { unsigned int idx; - idx = reg->idx[1].offset != ~0u ? reg->idx[1].offset : reg->idx[0].offset; + idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : 0; switch (reg->type) { case VKD3DSPR_RESOURCE: @@ -2887,12 +2919,6 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s case VKD3DSPR_DEPTHOUTLE: snprintf(buffer, buffer_size, "oDepth"); break; - case VKD3DSPR_FORKINSTID: - snprintf(buffer, buffer_size, "vForkInstanceId"); - break; - case VKD3DSPR_JOININSTID: - snprintf(buffer, buffer_size, "vJoinInstanceId"); - break; case VKD3DSPR_GSINSTID: snprintf(buffer, buffer_size, "vGSInstanceID"); break; @@ -2965,18 +2991,26 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count, unsigned int array_length) + enum vkd3d_shader_component_type component_type, unsigned int component_count, + const unsigned int *array_lengths, unsigned int length_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, length_id, ptr_type_id; + unsigned int i; - if (!array_length) + if (!length_count) return spirv_compiler_emit_variable(compiler, stream, storage_class, component_type, component_count); type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - length_id = spirv_compiler_get_constant_uint(compiler, array_length); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + for (i = 0; i < length_count; ++i) + { + if (!array_lengths[i]) + continue; + length_id = spirv_compiler_get_constant_uint(compiler, array_lengths[i]); + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + } + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } @@ -3169,7 +3203,6 @@ struct vkd3d_shader_register_info unsigned int structure_stride; unsigned int binding_base_idx; bool is_aggregate; - bool is_dynamically_indexed; }; static bool spirv_compiler_get_register_info(const struct spirv_compiler *compiler, @@ -3192,7 +3225,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = 0; register_info->binding_base_idx = 0; register_info->is_aggregate = false; - register_info->is_dynamically_indexed = false; return true; } @@ -3214,7 +3246,6 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil register_info->structure_stride = symbol->info.reg.structure_stride; register_info->binding_base_idx = symbol->info.reg.binding_base_idx; register_info->is_aggregate = symbol->info.reg.is_aggregate; - register_info->is_dynamically_indexed = symbol->info.reg.is_dynamically_indexed; return true; } @@ -3344,41 +3375,22 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp } else if (register_info->is_aggregate) { - if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_INCONTROLPOINT) - { - /* Indices for these are swapped compared to the generated SPIR-V. */ - if (reg->idx[1].offset != ~0u) - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); - if (reg->idx[0].offset != ~0u) - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); - } - else - { - struct vkd3d_shader_register_index reg_idx = reg->idx[0]; - - if (reg->idx[1].rel_addr) - FIXME("Relative addressing not implemented.\n"); - - if (register_info->is_dynamically_indexed) - { - indexes[index_count++] = vkd3d_spirv_build_op_load(builder, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, 1), - register_info->member_idx, SpvMemoryAccessMaskNone); - } - else - { - reg_idx.offset = register_info->member_idx; - indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®_idx); - } - } + /* Indices for these are swapped compared to the generated SPIR-V. */ + if (reg->idx_count > 2) + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[1]); + if (reg->idx_count > 1) + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); + if (!index_count) + /* A register sysval which is an array in SPIR-V, e.g. SAMPLEMASK. */ + indexes[index_count++] = spirv_compiler_get_constant_uint(compiler, 0); } else { - if (reg->idx[1].rel_addr || (reg->idx[1].offset == ~0u && reg->idx[0].rel_addr)) + if (reg->idx_count && reg->idx[reg->idx_count - 1].rel_addr) FIXME("Relative addressing not implemented.\n"); /* Handle arrayed registers, e.g. v[3][0]. */ - if (reg->idx[1].offset != ~0u && !register_is_descriptor(reg)) + if (reg->idx_count > 1 && !register_is_descriptor(reg)) indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); } @@ -4249,35 +4261,12 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp if ((builtin = get_spirv_builtin_for_register(reg_type))) return builtin; - if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT)) + if (sysval != VKD3D_SIV_NONE || (reg_type != VKD3DSPR_OUTPUT && reg_type != VKD3DSPR_COLOROUT + && reg_type != VKD3DSPR_PATCHCONST)) FIXME("Unhandled builtin (register type %#x, sysval %#x).\n", reg_type, sysval); return NULL; } -static const struct vkd3d_shader_signature_element *vkd3d_find_signature_element_for_reg( - const struct vkd3d_shader_signature *signature, unsigned int *signature_element_index, - unsigned int reg_idx, DWORD write_mask) -{ - unsigned int signature_idx; - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - { - if (signature->elements[signature_idx].register_index == reg_idx - && (signature->elements[signature_idx].mask & write_mask) == write_mask) - { - if (signature_element_index) - *signature_element_index = signature_idx; - return &signature->elements[signature_idx]; - } - } - - FIXME("Could not find shader signature element (register %u, write mask %#x).\n", - reg_idx, write_mask); - if (signature_element_index) - *signature_element_index = ~0u; - return NULL; -} - static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler) { struct vkd3d_shader_register r; @@ -4288,6 +4277,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler r.type = VKD3DSPR_OUTPOINTID; r.idx[0].offset = ~0u; r.idx[1].offset = ~0u; + r.idx_count = 0; return spirv_compiler_get_register_id(compiler, &r); } @@ -4302,7 +4292,7 @@ static uint32_t spirv_compiler_emit_load_invocation_id(struct spirv_compiler *co } static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compiler, - uint32_t id, const struct vkd3d_shader_phase *phase, const char *suffix) + uint32_t id, const char *suffix) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const char *name; @@ -4310,7 +4300,7 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile if (!suffix) suffix = ""; - switch (phase->type) + switch (compiler->phase) { case VKD3DSIH_HS_CONTROL_POINT_PHASE: name = "control"; @@ -4322,62 +4312,23 @@ static void spirv_compiler_emit_shader_phase_name(struct spirv_compiler *compile name = "join"; break; default: - ERR("Invalid phase type %#x.\n", phase->type); + ERR("Invalid phase type %#x.\n", compiler->phase); return; } - vkd3d_spirv_build_op_name(builder, id, "%s%u%s", name, phase->idx, suffix); -} - -static void spirv_compiler_begin_shader_phase(struct spirv_compiler *compiler, - struct vkd3d_shader_phase *phase) -{ - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t void_id, function_type_id; - unsigned int param_count; - uint32_t param_type_id; - - if (phase->instance_count) - { - param_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - param_count = 1; - } - else - { - param_count = 0; - } - - phase->function_id = vkd3d_spirv_alloc_id(builder); - - void_id = vkd3d_spirv_get_op_type_void(builder); - function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, ¶m_type_id, param_count); - vkd3d_spirv_build_op_function(builder, void_id, phase->function_id, - SpvFunctionControlMaskNone, function_type_id); - - if (phase->instance_count) - phase->instance_id = vkd3d_spirv_build_op_function_parameter(builder, param_type_id); - - vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); - - spirv_compiler_emit_shader_phase_name(compiler, phase->function_id, phase, NULL); + vkd3d_spirv_build_op_name(builder, id, "%s%s", name, suffix); } static const struct vkd3d_shader_phase *spirv_compiler_get_current_shader_phase( struct spirv_compiler *compiler) { - struct vkd3d_shader_phase *phase; - - if (!compiler->shader_phase_count) + if (is_in_default_phase(compiler)) return NULL; - phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; - if (!phase->function_id) - spirv_compiler_begin_shader_phase(compiler, phase); - return phase; + return is_in_control_point_phase(compiler) ? &compiler->control_point_phase : &compiler->patch_constant_phase; } static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, - uint32_t id, unsigned int component_count, const struct vkd3d_shader_signature_element *signature_element) + uint32_t id, unsigned int component_count, const struct signature_element *signature_element) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; const struct vkd3d_shader_transform_feedback_element *xfb_element; @@ -4436,17 +4387,21 @@ static void spirv_compiler_decorate_xfb_output(struct spirv_compiler *compiler, vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationOffset, offset); } -static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, - const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) +static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, const unsigned int *array_sizes, + unsigned int size_count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int sizes[2]; uint32_t id; - array_size = max(array_size, builtin->spirv_array_size); + assert(size_count <= ARRAY_SIZE(sizes)); + memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); + array_sizes = sizes; + sizes[0] = max(sizes[0], builtin->spirv_array_size); - id = spirv_compiler_emit_array_variable(compiler, - &builder->global_stream, storage_class, - builtin->component_type, builtin->component_count, array_size); + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, storage_class, + builtin->component_type, builtin->component_count, array_sizes, size_count); vkd3d_spirv_add_iface_variable(builder, id); spirv_compiler_decorate_builtin(compiler, id, builtin->spirv_builtin); @@ -4458,54 +4413,45 @@ static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *comp return id; } -static bool needs_private_io_variable(const struct vkd3d_shader_signature *signature, - unsigned int reg_idx, const struct vkd3d_spirv_builtin *builtin, - unsigned int *component_count, unsigned int *out_write_mask) +static uint32_t spirv_compiler_emit_builtin_variable(struct spirv_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, SpvStorageClass storage_class, unsigned int array_size) { - unsigned int write_mask = 0; - bool have_sysval = false; - unsigned int i, count; - - /* Always use private variables for arrayed builtins. These are generally - * scalars on the D3D side, so would need extra array indices when - * accessing them. It may be feasible to insert those indices at the point - * where the builtins are used, but it's not clear it's worth the effort. */ - if (builtin && (builtin->spirv_array_size || builtin->fixup_pfn)) - return true; - - if (*component_count == VKD3D_VEC4_SIZE) - return false; - - for (i = 0, count = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_signature_element *current = &signature->elements[i]; + return spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, &array_size, 1); +} - if (current->register_index != reg_idx) - continue; +static bool needs_private_io_variable(const struct vkd3d_spirv_builtin *builtin) +{ + return builtin && builtin->fixup_pfn; +} - write_mask |= current->mask; - ++count; +static unsigned int shader_signature_next_location(const struct shader_signature *signature) +{ + unsigned int i, max_row; - if (current->sysval_semantic) - have_sysval = true; - } + if (!signature) + return 0; - if (count == 1) - return false; + for (i = 0, max_row = 0; i < signature->element_count; ++i) + max_row = max(max_row, signature->elements[i].register_index + signature->elements[i].register_count); + return max_row; +} - if (builtin || have_sysval) - return true; +static unsigned int shader_register_get_io_indices(const struct vkd3d_shader_register *reg, + unsigned int *array_sizes) +{ + unsigned int i, element_idx; - if (!vkd3d_bitmask_is_contiguous(write_mask)) + array_sizes[0] = 0; + array_sizes[1] = 0; + element_idx = reg->idx[0].offset; + for (i = 1; i < reg->idx_count; ++i) { - FIXME("Write mask %#x is non-contiguous.\n", write_mask); - return true; + array_sizes[1] = array_sizes[0]; + array_sizes[0] = element_idx; + element_idx = reg->idx[i].offset; } - assert(vkd3d_write_mask_component_count(write_mask) >= *component_count); - *component_count = vkd3d_write_mask_component_count(write_mask); - *out_write_mask = write_mask; - return false; + return element_idx; } static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, @@ -4513,50 +4459,35 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, enum vkd3d_shader_interpolation_mode interpolation_mode) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *signature_element; - const struct vkd3d_shader_signature *shader_signature; const struct vkd3d_shader_register *reg = &dst->reg; unsigned int component_idx, input_component_count; + const struct signature_element *signature_element; + const struct shader_signature *shader_signature; enum vkd3d_shader_component_type component_type; uint32_t type_id, ptr_type_id, float_type_id; const struct vkd3d_spirv_builtin *builtin; + unsigned int write_mask, reg_write_mask; struct vkd3d_symbol *symbol = NULL; uint32_t val_id, input_id, var_id; struct vkd3d_symbol reg_symbol; - struct vkd3d_symbol tmp_symbol; SpvStorageClass storage_class; struct rb_entry *entry = NULL; bool use_private_var = false; - unsigned int write_mask; - unsigned int array_size; - unsigned int reg_idx; + unsigned int array_sizes[2]; + unsigned int element_idx; uint32_t i, index; - assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - - if (reg->idx[1].offset != ~0u) - { - array_size = reg->idx[0].offset; - reg_idx = reg->idx[1].offset; - } - else - { - array_size = 0; - reg_idx = reg->idx[0].offset; - } + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2 || !reg->idx[1].rel_addr); shader_signature = reg->type == VKD3DSPR_PATCHCONST - ? compiler->patch_constant_signature : compiler->input_signature; + ? &compiler->patch_constant_signature : &compiler->input_signature; - if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - NULL, reg_idx, dst->write_mask))) - { - FIXME("No signature element for shader input, ignoring shader input.\n"); - return 0; - } + element_idx = shader_register_get_io_indices(reg, array_sizes); + signature_element = &shader_signature->elements[element_idx]; - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !sysval && signature_element->sysval_semantic) + if ((compiler->shader_type == VKD3D_SHADER_TYPE_HULL || compiler->shader_type == VKD3D_SHADER_TYPE_GEOMETRY) + && !sysval && signature_element->sysval_semantic) sysval = vkd3d_siv_from_sysval(signature_element->sysval_semantic); builtin = get_spirv_builtin_for_sysval(compiler, sysval); @@ -4576,12 +4507,16 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, component_idx = vkd3d_write_mask_get_component_idx(signature_element->mask); } - if (needs_private_io_variable(shader_signature, reg_idx, builtin, &input_component_count, &write_mask) - && (compiler->shader_type != VKD3D_SHADER_TYPE_HULL - || (reg->type != VKD3DSPR_INCONTROLPOINT && reg->type != VKD3DSPR_PATCHCONST))) + if (needs_private_io_variable(builtin)) + { use_private_var = true; + reg_write_mask = write_mask; + } else + { component_idx = vkd3d_write_mask_get_component_idx(write_mask); + reg_write_mask = write_mask >> component_idx; + } storage_class = SpvStorageClassInput; @@ -4589,111 +4524,68 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) { + /* Except for vicp there should be one declaration per signature element. Sources of + * duplicate declarations are: a single register split into multiple declarations having + * different components, which should have been merged, and declarations in one phase + * being repeated in another (i.e. vcp/vocp), which should have been deleted. */ + if (reg->type != VKD3DSPR_INPUT || !is_in_fork_or_join_phase(compiler)) + FIXME("Duplicate input definition found.\n"); symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - input_id = symbol->id; - } - else if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL - && (reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)) - { - /* Input/output registers from one phase can be used as inputs in - * subsequent phases. Specifically: - * - * - Control phase inputs are available as "vicp" in fork and join - * phases. - * - Control phase outputs are available as "vocp" in fork and join - * phases. - * - Fork phase patch constants are available as "vpc" in join - * phases. - * - * We handle "vicp" and "vpc" here by creating aliases to the shader's - * global inputs and outputs. We handle "vocp" in - * spirv_compiler_leave_shader_phase(). */ - - tmp_symbol = reg_symbol; - if (reg->type == VKD3DSPR_PATCHCONST) - tmp_symbol.key.reg.type = VKD3DSPR_OUTPUT; - else - tmp_symbol.key.reg.type = VKD3DSPR_INPUT; - - if ((entry = rb_get(&compiler->symbol_table, &tmp_symbol))) - { - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - tmp_symbol = *symbol; - tmp_symbol.key.reg.type = reg->type; - spirv_compiler_put_symbol(compiler, &tmp_symbol); - - input_id = symbol->id; - } - else - { - if (reg->type == VKD3DSPR_PATCHCONST) - ERR("Patch constant register %u was not declared in a previous phase.\n", reg_idx); - else - ERR("Input control point register %u was not declared in a previous phase.\n", reg_idx); - } + return symbol->id; } - if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + if (builtin) { - if (builtin) - { - input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); - if (reg->type == VKD3DSPR_PATCHCONST) - vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); - } - else - { - unsigned int location = reg_idx; - - input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, input_component_count, array_size); - vkd3d_spirv_add_iface_variable(builder, input_id); - if (reg->type == VKD3DSPR_PATCHCONST) - { - vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); - location += compiler->input_signature->element_count; - } - vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); - if (component_idx) - vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); - - spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); - } + input_id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); + if (reg->type == VKD3DSPR_PATCHCONST) + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); } - - if (!symbol) + else { - var_id = input_id; - if (use_private_var) + unsigned int location = signature_element->register_index; + + input_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, input_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, input_id); + if (reg->type == VKD3DSPR_PATCHCONST) { - storage_class = SpvStorageClassPrivate; - var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_size); + vkd3d_spirv_build_op_decorate(builder, input_id, SpvDecorationPatch, NULL, 0); + location += shader_signature_next_location(&compiler->input_signature); } + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationLocation, location); + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, input_id, SpvDecorationComponent, component_idx); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_var ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.dcl_mask |= write_mask; - spirv_compiler_put_symbol(compiler, ®_symbol); - - spirv_compiler_emit_register_debug_name(builder, var_id, reg); + spirv_compiler_emit_interpolation_decorations(compiler, input_id, interpolation_mode); } - else + + var_id = input_id; + if (use_private_var) { - symbol->info.reg.dcl_mask |= write_mask; + storage_class = SpvStorageClassPrivate; + var_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, array_sizes, 2); } + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; + assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); + spirv_compiler_put_symbol(compiler, ®_symbol); + + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (use_private_var) { type_id = vkd3d_spirv_get_type_id(builder, component_type, input_component_count); - for (i = 0; i < max(array_size, 1); ++i) + for (i = 0; i < max(array_sizes[0], 1); ++i) { struct vkd3d_shader_register dst_reg = *reg; dst_reg.data_type = VKD3D_DATA_FLOAT; val_id = input_id; - if (array_size) + if (array_sizes[0]) { ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, i); @@ -4708,7 +4600,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, type_id); index = spirv_compiler_get_constant_uint(compiler, builtin->member_idx); val_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, ptr_type_id, input_id, index); - dst_reg.idx[0].offset = reg_idx + i; + dst_reg.idx[0].offset = element_idx + i; } val_id = vkd3d_spirv_build_op_load(builder, type_id, val_id, SpvMemoryAccessMaskNone); @@ -4743,9 +4635,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t input_id; - assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - assert(reg->idx[1].offset == ~0u); + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2); if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -4763,19 +4654,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, input_id, SpvStorageClassInput, builtin->component_type, write_mask); - reg_symbol.info.reg.dcl_mask = write_mask; reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_debug_name(builder, input_id, reg); } static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase, const struct vkd3d_shader_dst_param *dst) + const struct vkd3d_shader_dst_param *dst) { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_register *reg = &dst->reg; - struct vkd3d_symbol reg_symbol; - uint32_t val_id; switch (reg->type) { @@ -4787,10 +4674,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil case VKD3DSPR_PRIMID: spirv_compiler_emit_input_register(compiler, dst); return; - case VKD3DSPR_FORKINSTID: - case VKD3DSPR_JOININSTID: - val_id = phase->instance_id; - break; case VKD3DSPR_OUTPOINTID: /* Emitted in spirv_compiler_emit_initial_declarations(). */ case VKD3DSPR_OUTCONTROLPOINT: /* See spirv_compiler_leave_shader_phase(). */ return; @@ -4798,22 +4681,6 @@ static void spirv_compiler_emit_shader_phase_input(struct spirv_compiler *compil FIXME("Unhandled shader phase input register %#x.\n", reg->type); return; } - - vkd3d_symbol_make_register(®_symbol, reg); - vkd3d_symbol_set_register_info(®_symbol, val_id, - SpvStorageClassMax /* Intermediate value */, - VKD3D_SHADER_COMPONENT_UINT, VKD3DSP_WRITEMASK_0); - spirv_compiler_put_symbol(compiler, ®_symbol); - spirv_compiler_emit_register_debug_name(builder, val_id, reg); -} - -static unsigned int spirv_compiler_get_output_variable_index( - struct spirv_compiler *compiler, unsigned int register_idx) -{ - if (register_idx == ~0u) /* oDepth */ - return ARRAY_SIZE(compiler->private_output_variable) - 1; - assert(register_idx < ARRAY_SIZE(compiler->private_output_variable) - 1); - return register_idx; } static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, @@ -4835,8 +4702,7 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; } -static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signature_element *e, - uint32_t *mask) +static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) { if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) { @@ -4847,38 +4713,10 @@ static void calculate_clip_or_cull_distance_mask(const struct vkd3d_shader_signa *mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); } -static uint32_t calculate_sysval_array_mask(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature *signature, enum vkd3d_shader_input_sysval_semantic sysval) -{ - const struct vkd3d_shader_signature_element *e; - const struct vkd3d_spirv_builtin *sig_builtin; - const struct vkd3d_spirv_builtin *builtin; - uint32_t signature_idx, mask = 0; - - if (!(builtin = get_spirv_builtin_for_sysval(compiler, sysval))) - { - FIXME("Unhandled sysval %#x.\n", sysval); - return 0; - } - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - { - e = &signature->elements[signature_idx]; - - sig_builtin = get_spirv_builtin_for_sysval(compiler, - vkd3d_siv_from_sysval_indexed(e->sysval_semantic, e->semantic_index)); - - if (sig_builtin && sig_builtin->spirv_builtin == builtin->spirv_builtin) - mask |= (e->mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * sig_builtin->member_idx); - } - - return mask; -} - /* Emits arrayed SPIR-V built-in variables. */ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *output_signature = compiler->output_signature; + const struct shader_signature *output_signature = &compiler->output_signature; uint32_t clip_distance_mask = 0, clip_distance_id = 0; uint32_t cull_distance_mask = 0, cull_distance_id = 0; const struct vkd3d_spirv_builtin *builtin; @@ -4886,7 +4724,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + const struct signature_element *e = &output_signature->elements[i]; switch (e->sysval_semantic) { @@ -4921,7 +4759,7 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *e = &output_signature->elements[i]; + const struct signature_element *e = &output_signature->elements[i]; switch (e->sysval_semantic) { @@ -4953,9 +4791,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t output_id; - assert(!reg->idx[0].rel_addr); - assert(!reg->idx[1].rel_addr); - assert(reg->idx[1].offset == ~0u); + assert(!reg->idx_count || !reg->idx[0].rel_addr); + assert(reg->idx_count < 2); if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -4969,7 +4806,6 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); vkd3d_symbol_set_register_info(®_symbol, output_id, SpvStorageClassOutput, builtin->component_type, write_mask); - reg_symbol.info.reg.dcl_mask = write_mask; reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; spirv_compiler_put_symbol(compiler, ®_symbol); spirv_compiler_emit_register_execution_mode(compiler, reg); @@ -4977,7 +4813,7 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, } static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase, const struct vkd3d_spirv_builtin *builtin) + const struct vkd3d_spirv_builtin *builtin) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t *variable_id, id; @@ -4993,7 +4829,7 @@ static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_c return *variable_id; id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); - if (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE) + if (is_in_fork_or_join_phase(compiler)) vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); if (variable_id) @@ -5005,44 +4841,34 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_input_sysval_semantic sysval) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *signature_element; - const struct vkd3d_shader_signature *shader_signature; const struct vkd3d_shader_register *reg = &dst->reg; unsigned int component_idx, output_component_count; + const struct signature_element *signature_element; enum vkd3d_shader_component_type component_type; + const struct shader_signature *shader_signature; const struct vkd3d_spirv_builtin *builtin; - const struct vkd3d_shader_phase *phase; - struct vkd3d_symbol *symbol = NULL; + unsigned int write_mask, reg_write_mask; bool use_private_variable = false; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; - struct rb_entry *entry = NULL; - unsigned int signature_idx; - unsigned int write_mask; - unsigned int array_size; + unsigned int array_sizes[2]; + unsigned int element_idx; bool is_patch_constant; uint32_t id, var_id; - phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + is_patch_constant = is_in_fork_or_join_phase(compiler); - shader_signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; - array_size = is_control_point_phase(phase) ? compiler->output_control_point_count : 0; - - if (!(signature_element = vkd3d_find_signature_element_for_reg(shader_signature, - &signature_idx, reg->idx[0].offset, dst->write_mask))) - { - FIXME("No signature element for shader output, ignoring shader output.\n"); - return; - } + element_idx = shader_register_get_io_indices(reg, array_sizes); + signature_element = &shader_signature->elements[element_idx]; builtin = vkd3d_get_spirv_builtin(compiler, dst->reg.type, sysval); write_mask = signature_element->mask; - component_idx = vkd3d_write_mask_get_component_idx(dst->write_mask); - output_component_count = vkd3d_write_mask_component_count(signature_element->mask); + component_idx = vkd3d_write_mask_get_component_idx(write_mask); + output_component_count = vkd3d_write_mask_component_count(write_mask); if (builtin) { component_type = builtin->component_type; @@ -5058,128 +4884,103 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, storage_class = SpvStorageClassOutput; if (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE - || needs_private_io_variable(shader_signature, signature_element->register_index, - builtin, &output_component_count, &write_mask) - || is_patch_constant) + || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask) + || needs_private_io_variable(builtin)) + { use_private_variable = true; + reg_write_mask = write_mask; + } else + { component_idx = vkd3d_write_mask_get_component_idx(write_mask); + reg_write_mask = write_mask >> component_idx; + } vkd3d_symbol_make_register(®_symbol, reg); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + if (rb_get(&compiler->symbol_table, ®_symbol)) { - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - id = symbol->id; + /* See spirv_compiler_emit_input() for possible causes. */ + FIXME("Duplicate output definition found.\n"); + return; } - if (!symbol || ~symbol->info.reg.dcl_mask & write_mask) + if (compiler->output_info[element_idx].id) { - if (compiler->output_info[signature_idx].id) - { - id = compiler->output_info[signature_idx].id; - if (compiler->output_info[signature_idx].array_element_mask) - use_private_variable = true; - } - else if (builtin) - { - if (phase) - id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, phase, builtin); - else - id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); - - if (builtin->spirv_array_size) - compiler->output_info[signature_idx].array_element_mask = - calculate_sysval_array_mask(compiler, shader_signature, sysval); - - spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); - } + id = compiler->output_info[element_idx].id; + } + else if (builtin) + { + if (spirv_compiler_get_current_shader_phase(compiler)) + id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, builtin); else - { - unsigned int location = reg->idx[0].offset; - - if (is_patch_constant) - location += compiler->output_signature->element_count; - - id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - storage_class, component_type, output_component_count, array_size); - vkd3d_spirv_add_iface_variable(builder, id); - - if (is_dual_source_blending(compiler) && reg->idx[0].offset < 2) - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, reg->idx[0].offset); - } - else - { - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); - } - - if (component_idx) - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); - } + id = spirv_compiler_emit_builtin_variable_v(compiler, builtin, storage_class, array_sizes, 2); - if (is_patch_constant) - vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); - - spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); - - compiler->output_info[signature_idx].id = id; - compiler->output_info[signature_idx].component_type = component_type; + spirv_compiler_emit_register_execution_mode(compiler, &dst->reg); } - - if (!symbol) + else { - var_id = id; - if (use_private_variable) - storage_class = SpvStorageClassPrivate; + unsigned int location = signature_element->register_index; + if (is_patch_constant) - var_id = compiler->hs.patch_constants_id; - else if (use_private_variable) - var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, - storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + location += shader_signature_next_location(&compiler->output_signature); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.is_aggregate = use_private_variable ? is_patch_constant : array_size; - if (!use_private_variable && is_control_point_phase(phase)) + id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, + storage_class, component_type, output_component_count, array_sizes, 2); + vkd3d_spirv_add_iface_variable(builder, id); + + if (is_dual_source_blending(compiler) && signature_element->register_index < 2) { - reg_symbol.info.reg.member_idx = spirv_compiler_get_invocation_id(compiler); - reg_symbol.info.reg.is_dynamically_indexed = true; + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, 0); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationIndex, signature_element->register_index); } - else if (is_patch_constant) + else { - reg_symbol.info.reg.member_idx = reg->idx[0].offset; + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationLocation, location); } - reg_symbol.info.reg.dcl_mask = write_mask; - - spirv_compiler_put_symbol(compiler, ®_symbol); - if (!is_patch_constant) - spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (component_idx) + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationComponent, component_idx); } - else + + if (is_patch_constant) + vkd3d_spirv_build_op_decorate(builder, id, SpvDecorationPatch, NULL, 0); + + spirv_compiler_decorate_xfb_output(compiler, id, output_component_count, signature_element); + + compiler->output_info[element_idx].id = id; + compiler->output_info[element_idx].component_type = component_type; + + var_id = id; + if (use_private_variable) { - symbol->info.reg.dcl_mask |= write_mask; - var_id = symbol->id; + storage_class = SpvStorageClassPrivate; + var_id = spirv_compiler_emit_variable(compiler, &builder->global_stream, + storage_class, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); } + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; + assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); + + spirv_compiler_put_symbol(compiler, ®_symbol); + + if (!is_patch_constant) + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + if (use_private_variable) { - unsigned int idx = spirv_compiler_get_output_variable_index(compiler, reg->idx[0].offset); - compiler->private_output_variable[idx] = var_id; - compiler->private_output_variable_write_mask[idx] |= dst->write_mask; - if (is_patch_constant) - compiler->private_output_variable_array_idx[idx] = spirv_compiler_get_constant_uint( - compiler, reg->idx[0].offset); + compiler->private_output_variable[element_idx] = var_id; + compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; if (!compiler->epilogue_function_id) compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); } } static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature_element *e) + const struct signature_element *e) { enum vkd3d_shader_input_sysval_semantic sysval; const struct vkd3d_spirv_builtin *builtin; @@ -5198,14 +4999,14 @@ static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *com } static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, - const struct vkd3d_shader_signature *signature, const struct vkd3d_shader_signature_element *output, + const struct shader_signature *signature, const struct signature_element *output, const struct vkd3d_shader_output_info *output_info, uint32_t output_index_id, uint32_t val_id, unsigned int write_mask) { unsigned int dst_write_mask, use_mask, uninit_mask, swizzle, mask; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; - const struct vkd3d_shader_signature_element *element; + const struct signature_element *element; unsigned int i, index, array_idx; uint32_t output_id; @@ -5224,6 +5025,9 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi use_mask |= element->used_mask; } } + index = vkd3d_write_mask_get_component_idx(output->mask); + dst_write_mask >>= index; + use_mask >>= index; write_mask &= dst_write_mask; if (!write_mask) @@ -5294,22 +5098,19 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature *signature; - const struct vkd3d_shader_phase *phase; + const struct shader_signature *signature; uint32_t output_index_id = 0; bool is_patch_constant; unsigned int i, count; - DWORD variable_idx; STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_id)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(param_type_id)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_array_idx)); STATIC_ASSERT(ARRAY_SIZE(compiler->private_output_variable) == ARRAY_SIZE(compiler->private_output_variable_write_mask)); - phase = spirv_compiler_get_current_shader_phase(compiler); - is_patch_constant = phase && (phase->type == VKD3DSIH_HS_FORK_PHASE || phase->type == VKD3DSIH_HS_JOIN_PHASE); + is_patch_constant = is_in_fork_or_join_phase(compiler); - signature = is_patch_constant ? compiler->patch_constant_signature : compiler->output_signature; + signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature; function_id = compiler->epilogue_function_id; @@ -5340,7 +5141,7 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * param_id[i] = vkd3d_spirv_build_op_load(builder, type_id, param_id[i], SpvMemoryAccessMaskNone); } - if (is_control_point_phase(phase)) + if (is_in_control_point_phase(compiler)) output_index_id = spirv_compiler_emit_load_invocation_id(compiler); for (i = 0; i < signature->element_count; ++i) @@ -5348,14 +5149,12 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * if (!compiler->output_info[i].id) continue; - variable_idx = spirv_compiler_get_output_variable_index(compiler, - signature->elements[i].register_index); - if (!param_id[variable_idx]) + if (!param_id[i]) continue; spirv_compiler_emit_store_shader_output(compiler, signature, &signature->elements[i], &compiler->output_info[i], output_index_id, - param_id[variable_idx], compiler->private_output_variable_write_mask[variable_idx]); + param_id[i], compiler->private_output_variable_write_mask[i]); } vkd3d_spirv_build_op_return(&compiler->spirv_builder); @@ -5375,28 +5174,11 @@ static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *comp dst.reg.type = VKD3DSPR_OUTPOINTID; dst.reg.idx[0].offset = ~0u; dst.reg.idx[1].offset = ~0u; + dst.reg.idx_count = 0; dst.write_mask = VKD3DSP_WRITEMASK_0; spirv_compiler_emit_input_register(compiler, &dst); } -static void spirv_compiler_emit_hull_shader_patch_constants(struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_signature *signature = compiler->patch_constant_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t register_count = 0; - unsigned int signature_idx; - - for (signature_idx = 0; signature_idx < signature->element_count; ++signature_idx) - register_count = max(register_count, signature->elements[signature_idx].register_index + 1); - - if (!register_count) - return; - - compiler->hs.patch_constants_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, register_count); - vkd3d_spirv_build_op_name(builder, compiler->hs.patch_constants_id, "opc"); -} - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) { const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; @@ -5410,7 +5192,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp case VKD3D_SHADER_TYPE_HULL: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); spirv_compiler_emit_hull_shader_builtins(compiler); - spirv_compiler_emit_hull_shader_patch_constants(compiler); break; case VKD3D_SHADER_TYPE_DOMAIN: vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); @@ -5439,8 +5220,7 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) { vkd3d_spirv_builder_begin_main_function(builder); - - spirv_compiler_emit_shader_signature_outputs(compiler); + compiler->main_block_open = true; } } @@ -5522,12 +5302,13 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil reg.type = VKD3DSPR_IDXTEMP; reg.idx[0].offset = temp->register_idx; reg.idx[1].offset = ~0u; + reg.idx_count = 1; function_location = spirv_compiler_get_current_function_location(compiler); vkd3d_spirv_begin_function_stream_insertion(builder, function_location); id = spirv_compiler_emit_array_variable(compiler, &builder->function_stream, - SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, temp->register_size); + SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE, &temp->register_size, 1); spirv_compiler_emit_register_debug_name(builder, id, ®); @@ -6097,6 +5878,7 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_semantic *semantic = &instruction->declaration.semantic; + enum vkd3d_shader_resource_type resource_type = semantic->resource_type; uint32_t flags = instruction->flags; /* We don't distinguish between APPEND and COUNTER UAVs. */ @@ -6104,8 +5886,13 @@ static void spirv_compiler_emit_dcl_resource(struct spirv_compiler *compiler, if (flags) FIXME("Unhandled UAV flags %#x.\n", flags); + if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; + spirv_compiler_emit_resource_declaration(compiler, &semantic->resource, - semantic->resource_type, semantic->resource_data_type[0], 0, false); + resource_type, semantic->resource_data_type[0], 0, false); } static void spirv_compiler_emit_dcl_resource_raw(struct spirv_compiler *compiler, @@ -6185,10 +5972,9 @@ static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - const struct vkd3d_shader_phase *phase; - if ((phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_emit_shader_phase_input(compiler, phase, dst); + if (spirv_compiler_get_current_shader_phase(compiler)) + spirv_compiler_emit_shader_phase_input(compiler, dst); else if (vkd3d_shader_register_is_input(&dst->reg) || dst->reg.type == VKD3DSPR_PATCHCONST) spirv_compiler_emit_input(compiler, dst, VKD3D_SIV_NONE, VKD3DSIM_NONE); else @@ -6224,7 +6010,8 @@ static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, { const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; - if (vkd3d_shader_register_is_output(&dst->reg)) + if (vkd3d_shader_register_is_output(&dst->reg) + || (is_in_fork_or_join_phase(compiler) && vkd3d_shader_register_is_patch_constant(&dst->reg))) spirv_compiler_emit_output(compiler, dst, VKD3D_SIV_NONE); else spirv_compiler_emit_output_register(compiler, dst); @@ -6242,64 +6029,6 @@ static void spirv_compiler_emit_dcl_output_siv(struct spirv_compiler *compiler, spirv_compiler_emit_output(compiler, dst, sysval); } -static bool spirv_compiler_check_index_range(struct spirv_compiler *compiler, - const struct vkd3d_shader_index_range *range) -{ - const struct vkd3d_shader_register *reg = &range->dst.reg; - struct vkd3d_shader_register_info reg_info; - struct vkd3d_shader_register current_reg; - struct vkd3d_symbol reg_symbol; - unsigned int i; - uint32_t id; - - current_reg = *reg; - vkd3d_symbol_make_register(®_symbol, ¤t_reg); - if (!spirv_compiler_get_register_info(compiler, ¤t_reg, ®_info)) - { - ERR("Failed to get register info.\n"); - return false; - } - - /* FIXME: We should check if it's an array. */ - if (!reg_info.is_aggregate) - { - FIXME("Unhandled register %#x.\n", reg->type); - return false; - } - id = reg_info.id; - - for (i = reg->idx[0].offset; i < reg->idx[0].offset + range->register_count; ++i) - { - current_reg.idx[0].offset = i; - vkd3d_symbol_make_register(®_symbol, ¤t_reg); - - if (range->dst.write_mask != reg_info.write_mask - || vkd3d_write_mask_component_count(reg_info.write_mask) != 1) - { - FIXME("Unhandled index range write mask %#x (%#x).\n", - range->dst.write_mask, reg_info.write_mask); - return false; - } - - if (reg_info.id != id) - { - FIXME("Unhandled index range %#x, %u.\n", reg->type, i); - return false; - } - } - - return true; -} - -static void spirv_compiler_emit_dcl_index_range(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; - - if (!spirv_compiler_check_index_range(compiler, range)) - FIXME("Ignoring dcl_index_range %#x %u.\n", range->dst.reg.type, range->register_count); -} - static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { @@ -6495,157 +6224,83 @@ static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); } -static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler, - const struct vkd3d_shader_phase *phase) +static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); + +static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *signature = compiler->output_signature; + const struct shader_signature *signature = &compiler->output_signature; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_symbol reg_symbol, *symbol; - struct vkd3d_shader_register reg; - struct rb_entry *entry; - unsigned int i; + + if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) + spirv_compiler_emit_default_control_point_phase(compiler); vkd3d_spirv_build_op_function_end(builder); compiler->temp_id = 0; compiler->temp_count = 0; - /* - * vocp inputs in fork and join shader phases are outputs of the control - * point phase. Reinsert symbols for vocp registers while leaving the - * control point phase. - */ - if (is_control_point_phase(phase)) + if (is_in_control_point_phase(compiler)) { if (compiler->epilogue_function_id) { - spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, phase, "_epilogue"); + spirv_compiler_emit_shader_phase_name(compiler, compiler->epilogue_function_id, "_epilogue"); spirv_compiler_emit_shader_epilogue_function(compiler); } - memset(®, 0, sizeof(reg)); - reg.idx[1].offset = ~0u; - /* Fork and join phases share output registers (patch constants). * Control point phase has separate output registers. */ memset(compiler->output_info, 0, signature->element_count * sizeof(*compiler->output_info)); memset(compiler->private_output_variable, 0, sizeof(compiler->private_output_variable)); memset(compiler->private_output_variable_array_idx, 0, sizeof(compiler->private_output_variable_array_idx)); memset(compiler->private_output_variable_write_mask, 0, sizeof(compiler->private_output_variable_write_mask)); - - for (i = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_signature_element *e = &signature->elements[i]; - - reg.type = VKD3DSPR_OUTPUT; - reg.idx[0].offset = e->register_index; - vkd3d_symbol_make_register(®_symbol, ®); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - { - rb_remove(&compiler->symbol_table, entry); - - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - - reg.type = VKD3DSPR_OUTCONTROLPOINT; - reg.idx[1].offset = reg.idx[0].offset; - reg.idx[0].offset = compiler->output_control_point_count; - vkd3d_symbol_make_register(symbol, ®); - symbol->info.reg.is_aggregate = false; - - if (rb_put(&compiler->symbol_table, symbol, entry) == -1) - { - ERR("Failed to insert vocp symbol entry (%s).\n", debug_vkd3d_symbol(symbol)); - vkd3d_symbol_free(entry, NULL); - } - } - } - } - - if (phase->instance_count) - { - memset(®, 0, sizeof(reg)); - reg.type = phase->type == VKD3DSIH_HS_FORK_PHASE ? VKD3DSPR_FORKINSTID : VKD3DSPR_JOININSTID; - reg.idx[0].offset = ~0u; - reg.idx[1].offset = ~0u; - vkd3d_symbol_make_register(®_symbol, ®); - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - { - rb_remove(&compiler->symbol_table, entry); - vkd3d_symbol_free(entry, NULL); - } } } static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - const struct vkd3d_shader_phase *previous_phase; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t function_id, void_id, function_type_id; struct vkd3d_shader_phase *phase; - if ((previous_phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_leave_shader_phase(compiler, previous_phase); + assert(compiler->phase != instruction->handler_idx); - if (!vkd3d_array_reserve((void **)&compiler->shader_phases, &compiler->shader_phases_size, - compiler->shader_phase_count + 1, sizeof(*compiler->shader_phases))) - return; - phase = &compiler->shader_phases[compiler->shader_phase_count]; + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler); - phase->type = instruction->handler_idx; - phase->idx = compiler->shader_phase_count; - phase->instance_count = 0; - phase->function_id = 0; - phase->instance_id = 0; - phase->function_location = 0; + function_id = vkd3d_spirv_alloc_id(builder); - ++compiler->shader_phase_count; -} - -static int spirv_compiler_emit_shader_phase_instance_count(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) -{ - struct vkd3d_shader_phase *phase = &compiler->shader_phases[compiler->shader_phase_count - 1]; - - if (!compiler->shader_phase_count - || (phase->type != VKD3DSIH_HS_FORK_PHASE && phase->type != VKD3DSIH_HS_JOIN_PHASE) - || phase->function_id) - { - WARN("Unexpected dcl_hs_{fork,join}_phase_instance_count instruction.\n"); - return VKD3D_ERROR_INVALID_SHADER; - } - - phase->instance_count = instruction->declaration.count; - - spirv_compiler_begin_shader_phase(compiler, phase); - - return VKD3D_OK; -} + void_id = vkd3d_spirv_get_op_type_void(builder); + function_type_id = vkd3d_spirv_get_op_type_function(builder, void_id, NULL, 0); + vkd3d_spirv_build_op_function(builder, void_id, function_id, + SpvFunctionControlMaskNone, function_type_id); -static const struct vkd3d_shader_phase *spirv_compiler_get_control_point_phase( - struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_phase *phase; + vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); - if (compiler->shader_phase_count < 1) - return NULL; + compiler->phase = instruction->handler_idx; + spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); - phase = &compiler->shader_phases[0]; - if (is_control_point_phase(phase)) - return phase; + phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + ? &compiler->control_point_phase : &compiler->patch_constant_phase; + phase->function_id = function_id; + phase->function_location = vkd3d_spirv_stream_current_location(&builder->function_stream); - return NULL; + if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + compiler->emit_default_control_point_phase = instruction->flags; } static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) { - const struct vkd3d_shader_signature *output_signature = compiler->output_signature; - const struct vkd3d_shader_signature *input_signature = compiler->input_signature; + const struct shader_signature *output_signature = &compiler->output_signature; + const struct shader_signature *input_signature = &compiler->input_signature; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; enum vkd3d_shader_component_type component_type; - uint32_t input_id, output_id, dst_id, src_id; struct vkd3d_shader_src_param invocation; struct vkd3d_shader_register input_reg; uint32_t type_id, output_ptr_type_id; + uint32_t input_id, output_id, dst_id; unsigned int component_count; + unsigned int array_sizes[2]; uint32_t invocation_id; unsigned int i; @@ -6657,6 +6312,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile invocation.reg.idx[0].offset = ~0u; invocation.reg.idx[1].offset = ~0u; invocation.reg.idx[2].offset = ~0u; + invocation.reg.idx_count = 0; invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; memset(&input_reg, 0, sizeof(input_reg)); @@ -6664,37 +6320,42 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile input_reg.data_type = VKD3D_DATA_FLOAT; input_reg.idx[0].rel_addr = &invocation; input_reg.idx[2].offset = ~0u; + input_reg.idx_count = 2; input_id = spirv_compiler_get_register_id(compiler, &input_reg); assert(input_signature->element_count == output_signature->element_count); for (i = 0; i < output_signature->element_count; ++i) { - const struct vkd3d_shader_signature_element *output = &output_signature->elements[i]; - const struct vkd3d_shader_signature_element *input = &input_signature->elements[i]; + const struct signature_element *output = &output_signature->elements[i]; + const struct signature_element *input = &input_signature->elements[i]; assert(input->mask == output->mask); assert(input->component_type == output->component_type); - input_reg.idx[1].offset = input->register_index; + input_reg.idx[1].offset = i; input_id = spirv_compiler_get_register_id(compiler, &input_reg); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - src_id = vkd3d_spirv_build_op_load(builder, type_id, input_id, SpvMemoryAccessMaskNone); component_type = output->component_type; component_count = vkd3d_write_mask_component_count(output->mask); - output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassOutput, component_type, component_count, compiler->output_control_point_count); + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + if ((array_sizes[0] = (input->register_count > 1) ? input->register_count : 0)) + type_id = vkd3d_spirv_get_op_type_array(builder, type_id, spirv_compiler_get_constant_uint(compiler, + array_sizes[0])); + + array_sizes[1] = compiler->output_control_point_count; + output_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, SpvStorageClassOutput, + component_type, component_count, array_sizes, 2); vkd3d_spirv_add_iface_variable(builder, output_id); vkd3d_spirv_build_op_decorate1(builder, output_id, SpvDecorationLocation, output->register_index); vkd3d_spirv_build_op_name(builder, output_id, "vocp%u", output->register_index); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_id, invocation_id); - spirv_compiler_emit_store(compiler, dst_id, output->mask, - component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_ALL, src_id); + vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); } + + vkd3d_spirv_build_op_return(builder); } static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, @@ -6723,95 +6384,6 @@ static void spirv_compiler_emit_hull_shader_barrier(struct spirv_compiler *compi SpvScopeWorkgroup, SpvScopeInvocation, SpvMemorySemanticsMaskNone); } -static void spirv_compiler_emit_hull_shader_input_initialisation(struct spirv_compiler *compiler) -{ - uint32_t type_id, length_id, register_index_id, src_array_id, dst_array_id, vicp_id, tmp_id; - const struct vkd3d_shader_signature *signature = compiler->input_signature; - uint32_t src_type_id, dst_type_id, src_id, dst_id, point_index_id; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_signature_element *element; - enum vkd3d_shader_input_sysval_semantic sysval; - const struct vkd3d_spirv_builtin *builtin; - struct vkd3d_symbol *symbol, symbol_key; - unsigned int register_count, i, j; - struct vkd3d_shader_register r; - struct rb_entry *entry; - uint32_t indices[2]; - - for (i = 0, register_count = 0; i < signature->element_count; ++i) - { - register_count = max(register_count, signature->elements[i].register_index + 1); - } - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); - - memset(&r, 0, sizeof(r)); - r.type = VKD3DSPR_INPUT; - r.idx[0].offset = 0; - r.idx[1].offset = ~0u; - vkd3d_symbol_make_register(&symbol_key, &r); - - for (i = 0; i < signature->element_count; ++i) - { - element = &signature->elements[i]; - - symbol_key.key.reg.idx = element->register_index; - entry = rb_get(&compiler->symbol_table, &symbol_key); - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - - vicp_id = symbol->id; - register_index_id = spirv_compiler_get_constant_uint(compiler, element->register_index); - dst_array_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, type_id, vicp_id, register_index_id); - - if (element->sysval_semantic) - { - sysval = vkd3d_siv_from_sysval(element->sysval_semantic); - builtin = get_spirv_builtin_for_sysval(compiler, sysval); - src_array_id = spirv_compiler_emit_builtin_variable(compiler, builtin, - SpvStorageClassInput, compiler->input_control_point_count); - - if (builtin->component_count == 4) - { - vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); - } - else - { - tmp_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, builtin->component_count); - src_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassInput, tmp_id); - dst_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, tmp_id); - - for (j = 0; j < compiler->input_control_point_count; ++j) - { - point_index_id = spirv_compiler_get_constant_uint(compiler, j); - src_id = vkd3d_spirv_build_op_in_bounds_access_chain1(builder, - src_type_id, src_array_id, point_index_id); - - indices[0] = point_index_id; - indices[1] = spirv_compiler_get_constant_uint(compiler, 0); - dst_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, - dst_type_id, dst_array_id, indices, 2); - - vkd3d_spirv_build_op_copy_memory(builder, dst_id, src_id, SpvMemoryAccessMaskNone); - } - } - } - else - { - src_array_id = spirv_compiler_emit_array_variable(compiler, &builder->global_stream, - SpvStorageClassInput, VKD3D_SHADER_COMPONENT_FLOAT, 4, compiler->input_control_point_count); - vkd3d_spirv_add_iface_variable(builder, src_array_id); - vkd3d_spirv_build_op_decorate1(builder, src_array_id, SpvDecorationLocation, element->register_index); - vkd3d_spirv_build_op_name(builder, src_array_id, "v%u", element->register_index); - - vkd3d_spirv_build_op_copy_memory(builder, dst_array_id, src_array_id, SpvMemoryAccessMaskNone); - } - symbol->info.reg.dcl_mask |= element->mask; - } -} - static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -6854,46 +6426,21 @@ static void spirv_compiler_emit_shader_epilogue_invocation(struct spirv_compiler static void spirv_compiler_emit_hull_shader_main(struct spirv_compiler *compiler) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_phase *control_point_phase, *phase; - uint32_t phase_instance_id; - unsigned int i, j; uint32_t void_id; vkd3d_spirv_builder_begin_main_function(builder); - spirv_compiler_emit_hull_shader_input_initialisation(compiler); - void_id = vkd3d_spirv_get_op_type_void(builder); - if ((control_point_phase = spirv_compiler_get_control_point_phase(compiler))) - vkd3d_spirv_build_op_function_call(builder, void_id, control_point_phase->function_id, NULL, 0); - else - spirv_compiler_emit_default_control_point_phase(compiler); + vkd3d_spirv_build_op_function_call(builder, void_id, compiler->control_point_phase.function_id, NULL, 0); if (compiler->use_vocp) spirv_compiler_emit_hull_shader_barrier(compiler); - for (i = 0; i < compiler->shader_phase_count; ++i) - { - phase = &compiler->shader_phases[i]; - if (is_control_point_phase(phase)) - continue; - - if (phase->instance_count) - { - for (j = 0; j < phase->instance_count; ++j) - { - phase_instance_id = spirv_compiler_get_constant_uint(compiler, j); - vkd3d_spirv_build_op_function_call(builder, - void_id, phase->function_id, &phase_instance_id, 1); - } - } - else - { - vkd3d_spirv_build_op_function_call(builder, void_id, phase->function_id, NULL, 0); - } - } - + /* TODO: only call the patch constant function for invocation 0. The simplest way + * is to avoid use of private variables there, otherwise we would need a separate + * patch constant epilogue also only called from invocation 0. */ + vkd3d_spirv_build_op_function_call(builder, void_id, compiler->patch_constant_phase.function_id, NULL, 0); spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); vkd3d_spirv_build_op_function_end(builder); @@ -7575,10 +7122,10 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co static void spirv_compiler_emit_return(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { - const struct vkd3d_shader_phase *phase = spirv_compiler_get_current_shader_phase(compiler); struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (!phase || is_control_point_phase(phase))) + if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) + || is_in_control_point_phase(compiler))) spirv_compiler_emit_shader_epilogue_invocation(compiler); vkd3d_spirv_build_op_return(builder); @@ -7972,12 +7519,15 @@ static int spirv_compiler_emit_control_flow_instruction(struct spirv_compiler *c if (cf_info) cf_info->inside_block = false; + else + compiler->main_block_open = false; break; case VKD3DSIH_RETP: spirv_compiler_emit_retc(compiler, instruction); break; + case VKD3DSIH_DISCARD: case VKD3DSIH_TEXKILL: spirv_compiler_emit_kill(compiler, instruction); break; @@ -8256,7 +7806,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, image_operands[image_operand_count++] = spirv_compiler_emit_texel_offset(compiler, instruction, image.resource_type_info); } - if (multisample) + if (multisample && image.resource_type_info->ms) { operands_mask |= SpvImageOperandsSampleMask; image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, @@ -9521,58 +9071,6 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); } -static void spirv_compiler_emit_hull_shader_inputs(struct spirv_compiler *compiler) -{ - const struct vkd3d_shader_signature *signature = compiler->input_signature; - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, length_id, vicp_id, vicp_type_id; - unsigned int register_count, register_idx, i; - struct vkd3d_shader_register r; - struct vkd3d_symbol symbol; - struct rb_entry *entry; - - for (i = 0, register_count = 0; i < signature->element_count; ++i) - { - register_count = max(register_count, signature->elements[i].register_index + 1); - } - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 4); - length_id = spirv_compiler_get_constant_uint(compiler, compiler->input_control_point_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - length_id = spirv_compiler_get_constant_uint(compiler, register_count); - type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); - vicp_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPrivate, type_id); - - vicp_id = vkd3d_spirv_build_op_variable(builder, - &builder->global_stream, vicp_type_id, SpvStorageClassPrivate, 0); - vkd3d_spirv_build_op_name(builder, vicp_id, "vicp"); - - memset(&r, 0, sizeof(r)); - r.type = VKD3DSPR_INPUT; - r.idx[0].offset = 0; - r.idx[1].offset = ~0u; - vkd3d_symbol_make_register(&symbol, &r); - - for (i = 0; i < signature->element_count; ++i) - { - register_idx = signature->elements[i].register_index; - - symbol.key.reg.idx = register_idx; - if ((entry = rb_get(&compiler->symbol_table, &symbol))) - { - struct vkd3d_symbol *s = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - s->info.reg.dcl_mask |= signature->elements[i].mask; - continue; - } - - vkd3d_symbol_set_register_info(&symbol, vicp_id, SpvStorageClassPrivate, - VKD3D_SHADER_COMPONENT_FLOAT, VKD3DSP_WRITEMASK_ALL); - symbol.info.reg.dcl_mask = signature->elements[i].mask; - symbol.info.reg.is_aggregate = true; - spirv_compiler_put_symbol(compiler, &symbol); - } -} - /* This function is called after declarations are processed. */ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) { @@ -9581,8 +9079,6 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->xfb_info && compiler->xfb_info->element_count && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) spirv_compiler_emit_point_size(compiler); - if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_hull_shader_inputs(compiler); } static bool is_dcl_instruction(enum vkd3d_shader_opcode handler_idx) @@ -9660,9 +9156,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_SIV: spirv_compiler_emit_dcl_output_siv(compiler, instruction); break; - case VKD3DSIH_DCL_INDEX_RANGE: - spirv_compiler_emit_dcl_index_range(compiler, instruction); - break; case VKD3DSIH_DCL_STREAM: spirv_compiler_emit_dcl_stream(compiler, instruction); break; @@ -9699,10 +9192,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_THREAD_GROUP: spirv_compiler_emit_dcl_thread_group(compiler, instruction); break; - case VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - ret = spirv_compiler_emit_shader_phase_instance_count(compiler, instruction); - break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -9826,6 +9315,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CONTINUE: case VKD3DSIH_CONTINUEP: case VKD3DSIH_DEFAULT: + case VKD3DSIH_DISCARD: case VKD3DSIH_ELSE: case VKD3DSIH_ENDIF: case VKD3DSIH_ENDLOOP: @@ -9947,28 +9437,55 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, return ret; } -int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *spirv) { - const struct vkd3d_shader_instruction_array *instructions = &parser->instructions; const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_phase *phase; + struct vkd3d_shader_instruction_array instructions; enum vkd3d_result result = VKD3D_OK; unsigned int i; compiler->location.column = 0; - for (i = 0; i < instructions->count; ++i) + compiler->location.line = 1; + + instructions = parser->instructions; + memset(&parser->instructions, 0, sizeof(parser->instructions)); + + if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL + && (result = instruction_array_flatten_hull_shader_phases(&instructions)) >= 0) + { + result = instruction_array_normalise_hull_shader_control_point_io(&instructions, + &compiler->input_signature); + } + if (result >= 0) + result = instruction_array_normalise_io_registers(&instructions, parser->shader_version.type, + &compiler->input_signature, &compiler->output_signature, &compiler->patch_constant_signature); + + if (result >= 0 && TRACE_ON()) + vkd3d_shader_trace(&instructions, &parser->shader_version); + + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); + + for (i = 0; i < instructions.count && result >= 0; ++i) { compiler->location.line = i + 1; - if ((result = spirv_compiler_handle_instruction(compiler, &instructions->elements[i])) < 0) - return result; + result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); } - if ((phase = spirv_compiler_get_current_shader_phase(compiler))) - spirv_compiler_leave_shader_phase(compiler, phase); + shader_instruction_array_destroy(&instructions); + + if (result < 0) + return result; + + if (compiler->main_block_open) + vkd3d_spirv_build_op_return(builder); + + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler); else vkd3d_spirv_build_op_function_end(builder); @@ -10023,23 +9540,23 @@ int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, return VKD3D_OK; } -void spirv_compiler_destroy(struct spirv_compiler *compiler) +int spirv_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { - vkd3d_free(compiler->control_flow_info); - - vkd3d_free(compiler->output_info); - - vkd3d_free(compiler->push_constants); - vkd3d_free(compiler->descriptor_offset_ids); - - vkd3d_spirv_builder_free(&compiler->spirv_builder); - - rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); + struct spirv_compiler *spirv_compiler; + int ret; - vkd3d_free(compiler->shader_phases); - vkd3d_free(compiler->spec_constants); + if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, + compile_info, scan_descriptor_info, message_context, &parser->location))) + { + ERR("Failed to create SPIR-V compiler.\n"); + return VKD3D_ERROR; + } - vkd3d_string_buffer_cache_cleanup(&compiler->string_buffers); + ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); - vkd3d_free(compiler); + spirv_compiler_destroy(spirv_compiler); + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c new file mode 100644 index 00000000000..d066b13ee4e --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -0,0 +1,5234 @@ +/* + * TPF (Direct3D shader models 4 and 5 bytecode) support + * + * Copyright 2008-2009 Henri Verbeet for CodeWeavers + * Copyright 2010 Rico Schüller + * Copyright 2017 Józef Kucia for CodeWeavers + * Copyright 2019-2020 Zebediah Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "hlsl.h" + +#define SM4_MAX_SRC_COUNT 6 +#define SM4_MAX_DST_COUNT 2 + +STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); + +#define VKD3D_SM4_PS 0x0000u +#define VKD3D_SM4_VS 0x0001u +#define VKD3D_SM4_GS 0x0002u +#define VKD3D_SM5_HS 0x0003u +#define VKD3D_SM5_DS 0x0004u +#define VKD3D_SM5_CS 0x0005u +#define VKD3D_SM4_LIB 0xfff0u + +#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) + +#define VKD3D_SM4_MODIFIER_MASK 0x3fu + +#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) + +#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 +#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) +#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 +#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) +#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 +#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 +#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 +#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) + +#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 +#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT 16 +#define VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK (0xfu << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT) + +#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 +#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) + +#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 +#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) + +#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 +#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) + +#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 +#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) + +#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 +#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) + +#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 +#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) + +#define VKD3D_SM5_PRECISE_SHIFT 19 +#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) + +#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 +#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) + +#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 +#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu + +#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 +#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) + +#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 +#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) + +#define VKD3D_SM5_TESSELLATOR_SHIFT 11 +#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) + +#define VKD3D_SM4_OPCODE_MASK 0xff + +#define VKD3D_SM4_EXTENDED_OPERAND (0x1u << 31) + +#define VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK 0x3fu + +#define VKD3D_SM4_REGISTER_MODIFIER_SHIFT 6 +#define VKD3D_SM4_REGISTER_MODIFIER_MASK (0xffu << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + +#define VKD3D_SM4_REGISTER_PRECISION_SHIFT 14 +#define VKD3D_SM4_REGISTER_PRECISION_MASK (0x7u << VKD3D_SM4_REGISTER_PRECISION_SHIFT) + +#define VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT 17 +#define VKD3D_SM4_REGISTER_NON_UNIFORM_MASK (0x1u << VKD3D_SM4_REGISTER_NON_UNIFORM_SHIFT) + +#define VKD3D_SM4_ADDRESSING_SHIFT2 28 +#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) + +#define VKD3D_SM4_ADDRESSING_SHIFT1 25 +#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) + +#define VKD3D_SM4_ADDRESSING_SHIFT0 22 +#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) + +#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 +#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) + +#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 +#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) + +#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 +#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) + +#define VKD3D_SM4_DIMENSION_SHIFT 0 +#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) + +#define VKD3D_SM4_WRITEMASK_SHIFT 4 +#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) + +#define VKD3D_SM4_SWIZZLE_SHIFT 4 +#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) + +#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 +#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 + +#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 + +#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) + +#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) + +/* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ +#define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 + +enum vkd3d_sm4_opcode +{ + VKD3D_SM4_OP_ADD = 0x00, + VKD3D_SM4_OP_AND = 0x01, + VKD3D_SM4_OP_BREAK = 0x02, + VKD3D_SM4_OP_BREAKC = 0x03, + VKD3D_SM4_OP_CASE = 0x06, + VKD3D_SM4_OP_CONTINUE = 0x07, + VKD3D_SM4_OP_CONTINUEC = 0x08, + VKD3D_SM4_OP_CUT = 0x09, + VKD3D_SM4_OP_DEFAULT = 0x0a, + VKD3D_SM4_OP_DERIV_RTX = 0x0b, + VKD3D_SM4_OP_DERIV_RTY = 0x0c, + VKD3D_SM4_OP_DISCARD = 0x0d, + VKD3D_SM4_OP_DIV = 0x0e, + VKD3D_SM4_OP_DP2 = 0x0f, + VKD3D_SM4_OP_DP3 = 0x10, + VKD3D_SM4_OP_DP4 = 0x11, + VKD3D_SM4_OP_ELSE = 0x12, + VKD3D_SM4_OP_EMIT = 0x13, + VKD3D_SM4_OP_ENDIF = 0x15, + VKD3D_SM4_OP_ENDLOOP = 0x16, + VKD3D_SM4_OP_ENDSWITCH = 0x17, + VKD3D_SM4_OP_EQ = 0x18, + VKD3D_SM4_OP_EXP = 0x19, + VKD3D_SM4_OP_FRC = 0x1a, + VKD3D_SM4_OP_FTOI = 0x1b, + VKD3D_SM4_OP_FTOU = 0x1c, + VKD3D_SM4_OP_GE = 0x1d, + VKD3D_SM4_OP_IADD = 0x1e, + VKD3D_SM4_OP_IF = 0x1f, + VKD3D_SM4_OP_IEQ = 0x20, + VKD3D_SM4_OP_IGE = 0x21, + VKD3D_SM4_OP_ILT = 0x22, + VKD3D_SM4_OP_IMAD = 0x23, + VKD3D_SM4_OP_IMAX = 0x24, + VKD3D_SM4_OP_IMIN = 0x25, + VKD3D_SM4_OP_IMUL = 0x26, + VKD3D_SM4_OP_INE = 0x27, + VKD3D_SM4_OP_INEG = 0x28, + VKD3D_SM4_OP_ISHL = 0x29, + VKD3D_SM4_OP_ISHR = 0x2a, + VKD3D_SM4_OP_ITOF = 0x2b, + VKD3D_SM4_OP_LABEL = 0x2c, + VKD3D_SM4_OP_LD = 0x2d, + VKD3D_SM4_OP_LD2DMS = 0x2e, + VKD3D_SM4_OP_LOG = 0x2f, + VKD3D_SM4_OP_LOOP = 0x30, + VKD3D_SM4_OP_LT = 0x31, + VKD3D_SM4_OP_MAD = 0x32, + VKD3D_SM4_OP_MIN = 0x33, + VKD3D_SM4_OP_MAX = 0x34, + VKD3D_SM4_OP_SHADER_DATA = 0x35, + VKD3D_SM4_OP_MOV = 0x36, + VKD3D_SM4_OP_MOVC = 0x37, + VKD3D_SM4_OP_MUL = 0x38, + VKD3D_SM4_OP_NE = 0x39, + VKD3D_SM4_OP_NOP = 0x3a, + VKD3D_SM4_OP_NOT = 0x3b, + VKD3D_SM4_OP_OR = 0x3c, + VKD3D_SM4_OP_RESINFO = 0x3d, + VKD3D_SM4_OP_RET = 0x3e, + VKD3D_SM4_OP_RETC = 0x3f, + VKD3D_SM4_OP_ROUND_NE = 0x40, + VKD3D_SM4_OP_ROUND_NI = 0x41, + VKD3D_SM4_OP_ROUND_PI = 0x42, + VKD3D_SM4_OP_ROUND_Z = 0x43, + VKD3D_SM4_OP_RSQ = 0x44, + VKD3D_SM4_OP_SAMPLE = 0x45, + VKD3D_SM4_OP_SAMPLE_C = 0x46, + VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, + VKD3D_SM4_OP_SAMPLE_LOD = 0x48, + VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, + VKD3D_SM4_OP_SAMPLE_B = 0x4a, + VKD3D_SM4_OP_SQRT = 0x4b, + VKD3D_SM4_OP_SWITCH = 0x4c, + VKD3D_SM4_OP_SINCOS = 0x4d, + VKD3D_SM4_OP_UDIV = 0x4e, + VKD3D_SM4_OP_ULT = 0x4f, + VKD3D_SM4_OP_UGE = 0x50, + VKD3D_SM4_OP_UMUL = 0x51, + VKD3D_SM4_OP_UMAX = 0x53, + VKD3D_SM4_OP_UMIN = 0x54, + VKD3D_SM4_OP_USHR = 0x55, + VKD3D_SM4_OP_UTOF = 0x56, + VKD3D_SM4_OP_XOR = 0x57, + VKD3D_SM4_OP_DCL_RESOURCE = 0x58, + VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, + VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, + VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, + VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, + VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, + VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, + VKD3D_SM4_OP_DCL_INPUT = 0x5f, + VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, + VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, + VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, + VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, + VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, + VKD3D_SM4_OP_DCL_OUTPUT = 0x65, + VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, + VKD3D_SM4_OP_DCL_TEMPS = 0x68, + VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, + VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, + VKD3D_SM4_OP_LOD = 0x6c, + VKD3D_SM4_OP_GATHER4 = 0x6d, + VKD3D_SM4_OP_SAMPLE_POS = 0x6e, + VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, + VKD3D_SM5_OP_HS_DECLS = 0x71, + VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, + VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, + VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, + VKD3D_SM5_OP_EMIT_STREAM = 0x75, + VKD3D_SM5_OP_CUT_STREAM = 0x76, + VKD3D_SM5_OP_FCALL = 0x78, + VKD3D_SM5_OP_BUFINFO = 0x79, + VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, + VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, + VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, + VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, + VKD3D_SM5_OP_GATHER4_C = 0x7e, + VKD3D_SM5_OP_GATHER4_PO = 0x7f, + VKD3D_SM5_OP_GATHER4_PO_C = 0x80, + VKD3D_SM5_OP_RCP = 0x81, + VKD3D_SM5_OP_F32TOF16 = 0x82, + VKD3D_SM5_OP_F16TOF32 = 0x83, + VKD3D_SM5_OP_COUNTBITS = 0x86, + VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, + VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, + VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, + VKD3D_SM5_OP_UBFE = 0x8a, + VKD3D_SM5_OP_IBFE = 0x8b, + VKD3D_SM5_OP_BFI = 0x8c, + VKD3D_SM5_OP_BFREV = 0x8d, + VKD3D_SM5_OP_SWAPC = 0x8e, + VKD3D_SM5_OP_DCL_STREAM = 0x8f, + VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, + VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, + VKD3D_SM5_OP_DCL_INTERFACE = 0x92, + VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, + VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, + VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, + VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, + VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, + VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, + VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, + VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, + VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, + VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, + VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, + VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, + VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, + VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, + VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, + VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, + VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, + VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, + VKD3D_SM5_OP_LD_RAW = 0xa5, + VKD3D_SM5_OP_STORE_RAW = 0xa6, + VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, + VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, + VKD3D_SM5_OP_ATOMIC_AND = 0xa9, + VKD3D_SM5_OP_ATOMIC_OR = 0xaa, + VKD3D_SM5_OP_ATOMIC_XOR = 0xab, + VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, + VKD3D_SM5_OP_ATOMIC_IADD = 0xad, + VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, + VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, + VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, + VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, + VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, + VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, + VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, + VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, + VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, + VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, + VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, + VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, + VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, + VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, + VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, + VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, + VKD3D_SM5_OP_SYNC = 0xbe, + VKD3D_SM5_OP_DADD = 0xbf, + VKD3D_SM5_OP_DMAX = 0xc0, + VKD3D_SM5_OP_DMIN = 0xc1, + VKD3D_SM5_OP_DMUL = 0xc2, + VKD3D_SM5_OP_DEQ = 0xc3, + VKD3D_SM5_OP_DGE = 0xc4, + VKD3D_SM5_OP_DLT = 0xc5, + VKD3D_SM5_OP_DNE = 0xc6, + VKD3D_SM5_OP_DMOV = 0xc7, + VKD3D_SM5_OP_DMOVC = 0xc8, + VKD3D_SM5_OP_DTOF = 0xc9, + VKD3D_SM5_OP_FTOD = 0xca, + VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, + VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, + VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, + VKD3D_SM5_OP_DDIV = 0xd2, + VKD3D_SM5_OP_DFMA = 0xd3, + VKD3D_SM5_OP_DRCP = 0xd4, + VKD3D_SM5_OP_MSAD = 0xd5, + VKD3D_SM5_OP_DTOI = 0xd6, + VKD3D_SM5_OP_DTOU = 0xd7, + VKD3D_SM5_OP_ITOD = 0xd8, + VKD3D_SM5_OP_UTOD = 0xd9, + VKD3D_SM5_OP_GATHER4_S = 0xdb, + VKD3D_SM5_OP_GATHER4_C_S = 0xdc, + VKD3D_SM5_OP_GATHER4_PO_S = 0xdd, + VKD3D_SM5_OP_GATHER4_PO_C_S = 0xde, + VKD3D_SM5_OP_LD_S = 0xdf, + VKD3D_SM5_OP_LD2DMS_S = 0xe0, + VKD3D_SM5_OP_LD_UAV_TYPED_S = 0xe1, + VKD3D_SM5_OP_LD_RAW_S = 0xe2, + VKD3D_SM5_OP_LD_STRUCTURED_S = 0xe3, + VKD3D_SM5_OP_SAMPLE_LOD_S = 0xe4, + VKD3D_SM5_OP_SAMPLE_C_LZ_S = 0xe5, + VKD3D_SM5_OP_SAMPLE_CL_S = 0xe6, + VKD3D_SM5_OP_SAMPLE_B_CL_S = 0xe7, + VKD3D_SM5_OP_SAMPLE_GRAD_CL_S = 0xe8, + VKD3D_SM5_OP_SAMPLE_C_CL_S = 0xe9, + VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED = 0xea, +}; + +enum vkd3d_sm4_instruction_modifier +{ + VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, + VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, + VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, +}; + +enum vkd3d_sm4_register_type +{ + VKD3D_SM4_RT_TEMP = 0x00, + VKD3D_SM4_RT_INPUT = 0x01, + VKD3D_SM4_RT_OUTPUT = 0x02, + VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, + VKD3D_SM4_RT_IMMCONST = 0x04, + VKD3D_SM4_RT_IMMCONST64 = 0x05, + VKD3D_SM4_RT_SAMPLER = 0x06, + VKD3D_SM4_RT_RESOURCE = 0x07, + VKD3D_SM4_RT_CONSTBUFFER = 0x08, + VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, + VKD3D_SM4_RT_PRIMID = 0x0b, + VKD3D_SM4_RT_DEPTHOUT = 0x0c, + VKD3D_SM4_RT_NULL = 0x0d, + VKD3D_SM4_RT_RASTERIZER = 0x0e, + VKD3D_SM4_RT_OMASK = 0x0f, + VKD3D_SM5_RT_STREAM = 0x10, + VKD3D_SM5_RT_FUNCTION_BODY = 0x11, + VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, + VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, + VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, + VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, + VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, + VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, + VKD3D_SM5_RT_UAV = 0x1e, + VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, + VKD3D_SM5_RT_THREAD_ID = 0x20, + VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, + VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, + VKD3D_SM5_RT_COVERAGE = 0x23, + VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, + VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, +}; + +enum vkd3d_sm4_extended_operand_type +{ + VKD3D_SM4_EXTENDED_OPERAND_NONE = 0x0, + VKD3D_SM4_EXTENDED_OPERAND_MODIFIER = 0x1, +}; + +enum vkd3d_sm4_register_modifier +{ + VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x00, + VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x01, + VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x02, + VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0x03, +}; + +enum vkd3d_sm4_register_precision +{ + VKD3D_SM4_REGISTER_PRECISION_DEFAULT = 0x0, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 = 0x1, + VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 = 0x2, + VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 = 0x4, + VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 = 0x5, +}; + +enum vkd3d_sm4_output_primitive_type +{ + VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, + VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, + VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, +}; + +enum vkd3d_sm4_input_primitive_type +{ + VKD3D_SM4_INPUT_PT_POINT = 0x01, + VKD3D_SM4_INPUT_PT_LINE = 0x02, + VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, + VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, + VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, + VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, + VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, + VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, + VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, + VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, + VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, + VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, + VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, + VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, + VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, + VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, + VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, + VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, + VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, + VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, + VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, + VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, + VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, + VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, + VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, + VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, + VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, + VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, + VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, + VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, + VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, + VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, + VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, + VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, + VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, + VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, + VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, +}; + +enum vkd3d_sm4_swizzle_type +{ + VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, +}; + +enum vkd3d_sm4_dimension +{ + VKD3D_SM4_DIMENSION_NONE = 0x0, + VKD3D_SM4_DIMENSION_SCALAR = 0x1, + VKD3D_SM4_DIMENSION_VEC4 = 0x2, +}; + +enum vkd3d_sm4_resource_type +{ + VKD3D_SM4_RESOURCE_BUFFER = 0x1, + VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, + VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, + VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, + VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, + VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, + VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, + VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, + VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, + VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, + VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, + VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, +}; + +enum vkd3d_sm4_data_type +{ + VKD3D_SM4_DATA_UNORM = 0x1, + VKD3D_SM4_DATA_SNORM = 0x2, + VKD3D_SM4_DATA_INT = 0x3, + VKD3D_SM4_DATA_UINT = 0x4, + VKD3D_SM4_DATA_FLOAT = 0x5, + VKD3D_SM4_DATA_MIXED = 0x6, + VKD3D_SM4_DATA_DOUBLE = 0x7, + VKD3D_SM4_DATA_CONTINUED = 0x8, + VKD3D_SM4_DATA_UNUSED = 0x9, +}; + +enum vkd3d_sm4_sampler_mode +{ + VKD3D_SM4_SAMPLER_DEFAULT = 0x0, + VKD3D_SM4_SAMPLER_COMPARISON = 0x1, +}; + +enum vkd3d_sm4_shader_data_type +{ + VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, + VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, +}; + +struct sm4_index_range +{ + unsigned int index; + unsigned int count; + unsigned int mask; +}; + +struct sm4_index_range_array +{ + unsigned int count; + struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; +}; + +struct vkd3d_shader_sm4_parser +{ + const uint32_t *start, *end, *ptr; + + unsigned int output_map[MAX_REG_OUTPUT]; + + enum vkd3d_shader_opcode phase; + bool has_control_point_phase; + unsigned int input_register_masks[MAX_REG_OUTPUT]; + unsigned int output_register_masks[MAX_REG_OUTPUT]; + unsigned int patch_constant_register_masks[MAX_REG_OUTPUT]; + + struct sm4_index_range_array input_index_ranges; + struct sm4_index_range_array output_index_ranges; + struct sm4_index_range_array patch_constant_index_ranges; + + struct vkd3d_shader_parser p; +}; + +struct vkd3d_sm4_opcode_info +{ + enum vkd3d_sm4_opcode opcode; + enum vkd3d_shader_opcode handler_idx; + char dst_info[SM4_MAX_DST_COUNT]; + char src_info[SM4_MAX_SRC_COUNT]; + void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); +}; + +static const enum vkd3d_primitive_type output_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_POINTLIST */ VKD3D_PT_POINTLIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_LINESTRIP */ VKD3D_PT_LINESTRIP, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP */ VKD3D_PT_TRIANGLESTRIP, +}; + +static const enum vkd3d_primitive_type input_primitive_type_table[] = +{ + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_POINT */ VKD3D_PT_POINTLIST, + /* VKD3D_SM4_INPUT_PT_LINE */ VKD3D_PT_LINELIST, + /* VKD3D_SM4_INPUT_PT_TRIANGLE */ VKD3D_PT_TRIANGLELIST, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* UNKNOWN */ VKD3D_PT_UNDEFINED, + /* VKD3D_SM4_INPUT_PT_LINEADJ */ VKD3D_PT_LINELIST_ADJ, + /* VKD3D_SM4_INPUT_PT_TRIANGLEADJ */ VKD3D_PT_TRIANGLELIST_ADJ, +}; + +static const enum vkd3d_shader_resource_type resource_type_table[] = +{ + /* 0 */ VKD3D_SHADER_RESOURCE_NONE, + /* VKD3D_SM4_RESOURCE_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMS */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, + /* VKD3D_SM4_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, + /* VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, + /* VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, + /* VKD3D_SM4_RESOURCE_RAW_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, + /* VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER */ VKD3D_SHADER_RESOURCE_BUFFER, +}; + +static const enum vkd3d_data_type data_type_table[] = +{ + /* 0 */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_UNORM */ VKD3D_DATA_UNORM, + /* VKD3D_SM4_DATA_SNORM */ VKD3D_DATA_SNORM, + /* VKD3D_SM4_DATA_INT */ VKD3D_DATA_INT, + /* VKD3D_SM4_DATA_UINT */ VKD3D_DATA_UINT, + /* VKD3D_SM4_DATA_FLOAT */ VKD3D_DATA_FLOAT, + /* VKD3D_SM4_DATA_MIXED */ VKD3D_DATA_MIXED, + /* VKD3D_SM4_DATA_DOUBLE */ VKD3D_DATA_DOUBLE, + /* VKD3D_SM4_DATA_CONTINUED */ VKD3D_DATA_CONTINUED, + /* VKD3D_SM4_DATA_UNUSED */ VKD3D_DATA_UNUSED, +}; + +static struct vkd3d_shader_sm4_parser *vkd3d_shader_sm4_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct vkd3d_shader_sm4_parser, p); +} + +static bool shader_is_sm_5_1(const struct vkd3d_shader_sm4_parser *sm4) +{ + const struct vkd3d_shader_version *version = &sm4->p.shader_version; + + return version->major >= 5 && version->minor >= 1; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param); +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param); + +static bool shader_sm4_read_register_space(struct vkd3d_shader_sm4_parser *priv, + const uint32_t **ptr, const uint32_t *end, unsigned int *register_space) +{ + *register_space = 0; + + if (!shader_is_sm_5_1(priv)) + return true; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + + *register_space = *(*ptr)++; + return true; +} + +static void shader_sm4_read_conditional_op(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_UINT, + (struct vkd3d_shader_src_param *)&ins->src[0]); + ins->flags = (opcode_token & VKD3D_SM4_CONDITIONAL_NZ) ? + VKD3D_SHADER_CONDITIONAL_OP_NZ : VKD3D_SHADER_CONDITIONAL_OP_Z; +} + +static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_immediate_constant_buffer *icb; + enum vkd3d_sm4_shader_data_type type; + unsigned int icb_size; + + type = (opcode_token & VKD3D_SM4_SHADER_DATA_TYPE_MASK) >> VKD3D_SM4_SHADER_DATA_TYPE_SHIFT; + if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) + { + FIXME("Ignoring shader data type %#x.\n", type); + ins->handler_idx = VKD3DSIH_NOP; + return; + } + + ++tokens; + icb_size = token_count - 1; + if (icb_size % 4) + { + FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + + if (!(icb = vkd3d_malloc(offsetof(struct vkd3d_shader_immediate_constant_buffer, data[icb_size])))) + { + ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + icb->vec4_count = icb_size / 4; + memcpy(icb->data, tokens, sizeof(*tokens) * icb_size); + shader_instruction_array_add_icb(&priv->p.instructions, icb); + ins->declaration.icb = icb; +} + +static void shader_sm4_set_descriptor_register_range(struct vkd3d_shader_sm4_parser *sm4, + const struct vkd3d_shader_register *reg, struct vkd3d_shader_register_range *range) +{ + range->first = reg->idx[1].offset; + range->last = reg->idx[shader_is_sm_5_1(sm4) ? 2 : 1].offset; + if (range->last < range->first) + { + FIXME("Invalid register range [%u:%u].\n", range->first, range->last); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE, + "Last register %u must not be less than first register %u in range.", range->last, range->first); + } +} + +static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; + enum vkd3d_sm4_resource_type resource_type; + const uint32_t *end = &tokens[token_count]; + enum vkd3d_sm4_data_type data_type; + enum vkd3d_data_type reg_data_type; + DWORD components; + unsigned int i; + + resource_type = (opcode_token & VKD3D_SM4_RESOURCE_TYPE_MASK) >> VKD3D_SM4_RESOURCE_TYPE_SHIFT; + if (!resource_type || (resource_type >= ARRAY_SIZE(resource_type_table))) + { + FIXME("Unhandled resource type %#x.\n", resource_type); + semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + else + { + semantic->resource_type = resource_type_table[resource_type]; + } + + if (semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS + || semantic->resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) + { + semantic->sample_count = (opcode_token & VKD3D_SM4_RESOURCE_SAMPLE_COUNT_MASK) + >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + reg_data_type = opcode == VKD3D_SM4_OP_DCL_RESOURCE ? VKD3D_DATA_RESOURCE : VKD3D_DATA_UAV; + shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); + + components = *tokens++; + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + semantic->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + semantic->resource_data_type[i] = data_type_table[data_type]; + } + } + + if (reg_data_type == VKD3D_DATA_UAV) + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + + shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); +} + +static void shader_sm4_read_dcl_constant_buffer(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_FLOAT, &ins->declaration.cb.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.cb.src.reg, &ins->declaration.cb.range); + if (opcode_token & VKD3D_SM4_INDEX_TYPE_MASK) + ins->flags |= VKD3DSI_INDEXED_DYNAMIC; + + ins->declaration.cb.size = ins->declaration.cb.src.reg.idx[2].offset; + ins->declaration.cb.range.space = 0; + + if (shader_is_sm_5_1(priv)) + { + if (tokens >= end) + { + FIXME("Invalid ptr %p >= end %p.\n", tokens, end); + return; + } + + ins->declaration.cb.size = *tokens++; + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.cb.range.space); + } +} + +static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + const uint32_t *end = &tokens[token_count]; + + ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; + if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) + FIXME("Unhandled sampler mode %#x.\n", ins->flags); + shader_sm4_read_src_param(priv, &tokens, end, VKD3D_DATA_SAMPLER, &ins->declaration.sampler.src); + shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); + shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); +} + +static bool sm4_parser_is_in_fork_or_join_phase(const struct vkd3d_shader_sm4_parser *sm4) +{ + return sm4->phase == VKD3DSIH_HS_FORK_PHASE || sm4->phase == VKD3DSIH_HS_JOIN_PHASE; +} + +static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_index_range *index_range = &ins->declaration.index_range; + unsigned int i, register_idx, register_count, write_mask; + enum vkd3d_shader_register_type type; + struct sm4_index_range_array *ranges; + unsigned int *io_masks; + + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, + &index_range->dst); + index_range->register_count = *tokens; + + register_idx = index_range->dst.reg.idx[index_range->dst.reg.idx_count - 1].offset; + register_count = index_range->register_count; + write_mask = index_range->dst.write_mask; + + if (vkd3d_write_mask_component_count(write_mask) != 1) + { + WARN("Unhandled write mask %#x.\n", write_mask); + vkd3d_shader_parser_warning(&priv->p, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK, + "Index range mask %#x is not scalar.", write_mask); + } + + switch ((type = index_range->dst.reg.type)) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + io_masks = priv->input_register_masks; + ranges = &priv->input_index_ranges; + break; + case VKD3DSPR_OUTPUT: + if (sm4_parser_is_in_fork_or_join_phase(priv)) + { + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; + } + else + { + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; + } + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; + break; + case VKD3DSPR_PATCHCONST: + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; + break; + + default: + WARN("Unhandled register type %#x.\n", type); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Invalid register type %#x for index range base %u, count %u, mask %#x.", + type, register_idx, register_count, write_mask); + return; + } + + for (i = 0; i < ranges->count; ++i) + { + struct sm4_index_range r = ranges->ranges[i]; + + if (!(r.mask & write_mask)) + continue; + /* Ranges with the same base but different lengths are not an issue. */ + if (register_idx == r.index) + continue; + + if ((r.index <= register_idx && register_idx - r.index < r.count) + || (register_idx < r.index && r.index - register_idx < register_count)) + { + WARN("Detected index range collision for base %u, count %u, mask %#x.\n", + register_idx, register_count, write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Register index range base %u, count %u, mask %#x collides with a previous declaration.", + register_idx, register_count, write_mask); + return; + } + } + ranges->ranges[ranges->count].index = register_idx; + ranges->ranges[ranges->count].count = register_count; + ranges->ranges[ranges->count++].mask = write_mask; + + for (i = 0; i < register_count; ++i) + { + if ((io_masks[register_idx + i] & write_mask) != write_mask) + { + WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", + register_idx, register_count, write_mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Input/output registers matching index range base %u, count %u, mask %#x were not declared.", + register_idx, register_count, write_mask); + return; + } + } +} + +static void shader_sm4_read_dcl_output_topology(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_output_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (primitive_type >= ARRAY_SIZE(output_primitive_type_table)) + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + else + ins->declaration.primitive_type.type = output_primitive_type_table[primitive_type]; + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled output primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_dcl_input_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + enum vkd3d_sm4_input_primitive_type primitive_type; + + primitive_type = (opcode_token & VKD3D_SM4_PRIMITIVE_TYPE_MASK) >> VKD3D_SM4_PRIMITIVE_TYPE_SHIFT; + if (VKD3D_SM5_INPUT_PT_PATCH1 <= primitive_type && primitive_type <= VKD3D_SM5_INPUT_PT_PATCH32) + { + ins->declaration.primitive_type.type = VKD3D_PT_PATCH; + ins->declaration.primitive_type.patch_vertex_count = primitive_type - VKD3D_SM5_INPUT_PT_PATCH1 + 1; + } + else if (primitive_type >= ARRAY_SIZE(input_primitive_type_table)) + { + ins->declaration.primitive_type.type = VKD3D_PT_UNDEFINED; + } + else + { + ins->declaration.primitive_type.type = input_primitive_type_table[primitive_type]; + } + + if (ins->declaration.primitive_type.type == VKD3D_PT_UNDEFINED) + FIXME("Unhandled input primitive type %#x.\n", primitive_type); +} + +static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = *tokens; +} + +static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_declaration_register_semantic(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.dst); +} + +static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_INTERPOLATION_MODE_MASK) >> VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.register_semantic.reg); + ins->declaration.register_semantic.sysval_semantic = *tokens; +} + +static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.indexable_temp.register_idx = *tokens++; + ins->declaration.indexable_temp.register_size = *tokens++; + ins->declaration.indexable_temp.component_count = *tokens; +} + +static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; +} + +static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_src_param *src_params = (struct vkd3d_shader_src_param *)ins->src; + src_params[0].reg.u.fp_body_idx = *tokens++; + shader_sm4_read_src_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_OPAQUE, &src_params[0]); +} + +static void shader_sm5_read_dcl_function_body(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens; +} + +static void shader_sm5_read_dcl_function_table(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.index = *tokens++; + FIXME("Ignoring set of function bodies (count %u).\n", *tokens); +} + +static void shader_sm5_read_dcl_interface(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.fp.index = *tokens++; + ins->declaration.fp.body_count = *tokens++; + ins->declaration.fp.array_size = *tokens >> VKD3D_SM5_FP_ARRAY_SIZE_SHIFT; + ins->declaration.fp.table_count = *tokens++ & VKD3D_SM5_FP_TABLE_COUNT_MASK; + FIXME("Ignoring set of function tables (count %u).\n", ins->declaration.fp.table_count); +} + +static void shader_sm5_read_control_point_count(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.count = (opcode_token & VKD3D_SM5_CONTROL_POINT_COUNT_MASK) + >> VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; +} + +static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.max_tessellation_factor = *(float *)tokens; +} + +static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->declaration.thread_group_size.x = *tokens++; + ins->declaration.thread_group_size.y = *tokens++; + ins->declaration.thread_group_size.z = *tokens++; +} + +static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_UAV, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, &ins->declaration.tgsm_raw.reg); + ins->declaration.tgsm_raw.byte_count = *tokens; + if (ins->declaration.tgsm_raw.byte_count % 4) + FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); +} + +static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, + &ins->declaration.tgsm_structured.reg); + ins->declaration.tgsm_structured.byte_stride = *tokens++; + ins->declaration.tgsm_structured.structure_count = *tokens; + if (ins->declaration.tgsm_structured.byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); +} + +static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + resource->byte_stride = *tokens++; + if (resource->byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", resource->byte_stride); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, + uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; + const uint32_t *end = &tokens[token_count]; + + shader_sm4_read_dst_param(priv, &tokens, end, VKD3D_DATA_RESOURCE, &resource->resource.reg); + shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); + shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); +} + +static void shader_sm5_read_sync(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) +{ + ins->flags = (opcode_token & VKD3D_SM5_SYNC_FLAGS_MASK) >> VKD3D_SM5_SYNC_FLAGS_SHIFT; +} + +/* + * d -> VKD3D_DATA_DOUBLE + * f -> VKD3D_DATA_FLOAT + * i -> VKD3D_DATA_INT + * u -> VKD3D_DATA_UINT + * O -> VKD3D_DATA_OPAQUE + * R -> VKD3D_DATA_RESOURCE + * S -> VKD3D_DATA_SAMPLER + * U -> VKD3D_DATA_UAV + */ +static const struct vkd3d_sm4_opcode_info opcode_table[] = +{ + {VKD3D_SM4_OP_ADD, VKD3DSIH_ADD, "f", "ff"}, + {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u"}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, + {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, + {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, + {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, + {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, + {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, + {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, + {VKD3D_SM4_OP_DP4, VKD3DSIH_DP4, "f", "ff"}, + {VKD3D_SM4_OP_ELSE, VKD3DSIH_ELSE, "", ""}, + {VKD3D_SM4_OP_EMIT, VKD3DSIH_EMIT, "", ""}, + {VKD3D_SM4_OP_ENDIF, VKD3DSIH_ENDIF, "", ""}, + {VKD3D_SM4_OP_ENDLOOP, VKD3DSIH_ENDLOOP, "", ""}, + {VKD3D_SM4_OP_ENDSWITCH, VKD3DSIH_ENDSWITCH, "", ""}, + {VKD3D_SM4_OP_EQ, VKD3DSIH_EQ, "u", "ff"}, + {VKD3D_SM4_OP_EXP, VKD3DSIH_EXP, "f", "f"}, + {VKD3D_SM4_OP_FRC, VKD3DSIH_FRC, "f", "f"}, + {VKD3D_SM4_OP_FTOI, VKD3DSIH_FTOI, "i", "f"}, + {VKD3D_SM4_OP_FTOU, VKD3DSIH_FTOU, "u", "f"}, + {VKD3D_SM4_OP_GE, VKD3DSIH_GE, "u", "ff"}, + {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, + {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, + {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, + {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, + {VKD3D_SM4_OP_IMAD, VKD3DSIH_IMAD, "i", "iii"}, + {VKD3D_SM4_OP_IMAX, VKD3DSIH_IMAX, "i", "ii"}, + {VKD3D_SM4_OP_IMIN, VKD3DSIH_IMIN, "i", "ii"}, + {VKD3D_SM4_OP_IMUL, VKD3DSIH_IMUL, "ii", "ii"}, + {VKD3D_SM4_OP_INE, VKD3DSIH_INE, "u", "ii"}, + {VKD3D_SM4_OP_INEG, VKD3DSIH_INEG, "i", "i"}, + {VKD3D_SM4_OP_ISHL, VKD3DSIH_ISHL, "i", "ii"}, + {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, + {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, + {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, + {VKD3D_SM4_OP_LD, VKD3DSIH_LD, "u", "iR"}, + {VKD3D_SM4_OP_LD2DMS, VKD3DSIH_LD2DMS, "u", "iRi"}, + {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, + {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, + {VKD3D_SM4_OP_LT, VKD3DSIH_LT, "u", "ff"}, + {VKD3D_SM4_OP_MAD, VKD3DSIH_MAD, "f", "fff"}, + {VKD3D_SM4_OP_MIN, VKD3DSIH_MIN, "f", "ff"}, + {VKD3D_SM4_OP_MAX, VKD3DSIH_MAX, "f", "ff"}, + {VKD3D_SM4_OP_SHADER_DATA, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER, "", "", + shader_sm4_read_shader_data}, + {VKD3D_SM4_OP_MOV, VKD3DSIH_MOV, "f", "f"}, + {VKD3D_SM4_OP_MOVC, VKD3DSIH_MOVC, "f", "uff"}, + {VKD3D_SM4_OP_MUL, VKD3DSIH_MUL, "f", "ff"}, + {VKD3D_SM4_OP_NE, VKD3DSIH_NE, "u", "ff"}, + {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, + {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, + {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "iR"}, + {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, + {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", + shader_sm4_read_conditional_op}, + {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, + {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, + {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, + {VKD3D_SM4_OP_SAMPLE, VKD3DSIH_SAMPLE, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_C, VKD3DSIH_SAMPLE_C, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3DSIH_SAMPLE_C_LZ, "f", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_LOD, VKD3DSIH_SAMPLE_LOD, "u", "fRSf"}, + {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3DSIH_SAMPLE_GRAD, "u", "fRSff"}, + {VKD3D_SM4_OP_SAMPLE_B, VKD3DSIH_SAMPLE_B, "u", "fRSf"}, + {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, + {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, + {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, + {VKD3D_SM4_OP_UDIV, VKD3DSIH_UDIV, "uu", "uu"}, + {VKD3D_SM4_OP_ULT, VKD3DSIH_ULT, "u", "uu"}, + {VKD3D_SM4_OP_UGE, VKD3DSIH_UGE, "u", "uu"}, + {VKD3D_SM4_OP_UMUL, VKD3DSIH_UMUL, "uu", "uu"}, + {VKD3D_SM4_OP_UMAX, VKD3DSIH_UMAX, "u", "uu"}, + {VKD3D_SM4_OP_UMIN, VKD3DSIH_UMIN, "u", "uu"}, + {VKD3D_SM4_OP_USHR, VKD3DSIH_USHR, "u", "uu"}, + {VKD3D_SM4_OP_UTOF, VKD3DSIH_UTOF, "f", "u"}, + {VKD3D_SM4_OP_XOR, VKD3DSIH_XOR, "u", "uu"}, + {VKD3D_SM4_OP_DCL_RESOURCE, VKD3DSIH_DCL, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, VKD3DSIH_DCL_CONSTANT_BUFFER, "", "", + shader_sm4_read_dcl_constant_buffer}, + {VKD3D_SM4_OP_DCL_SAMPLER, VKD3DSIH_DCL_SAMPLER, "", "", + shader_sm4_read_dcl_sampler}, + {VKD3D_SM4_OP_DCL_INDEX_RANGE, VKD3DSIH_DCL_INDEX_RANGE, "", "", + shader_sm4_read_dcl_index_range}, + {VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY, VKD3DSIH_DCL_OUTPUT_TOPOLOGY, "", "", + shader_sm4_read_dcl_output_topology}, + {VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE, VKD3DSIH_DCL_INPUT_PRIMITIVE, "", "", + shader_sm4_read_dcl_input_primitive}, + {VKD3D_SM4_OP_DCL_VERTICES_OUT, VKD3DSIH_DCL_VERTICES_OUT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INPUT, VKD3DSIH_DCL_INPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_INPUT_SGV, VKD3DSIH_DCL_INPUT_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_SIV, VKD3DSIH_DCL_INPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS, VKD3DSIH_DCL_INPUT_PS, "", "", + shader_sm4_read_dcl_input_ps}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SGV, VKD3DSIH_DCL_INPUT_PS_SGV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_INPUT_PS_SIV, VKD3DSIH_DCL_INPUT_PS_SIV, "", "", + shader_sm4_read_dcl_input_ps_siv}, + {VKD3D_SM4_OP_DCL_OUTPUT, VKD3DSIH_DCL_OUTPUT, "", "", + shader_sm4_read_declaration_dst}, + {VKD3D_SM4_OP_DCL_OUTPUT_SIV, VKD3DSIH_DCL_OUTPUT_SIV, "", "", + shader_sm4_read_declaration_register_semantic}, + {VKD3D_SM4_OP_DCL_TEMPS, VKD3DSIH_DCL_TEMPS, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, VKD3DSIH_DCL_INDEXABLE_TEMP, "", "", + shader_sm4_read_dcl_indexable_temp}, + {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", + shader_sm4_read_dcl_global_flags}, + {VKD3D_SM4_OP_LOD, VKD3DSIH_LOD, "f", "fRS"}, + {VKD3D_SM4_OP_GATHER4, VKD3DSIH_GATHER4, "u", "fRS"}, + {VKD3D_SM4_OP_SAMPLE_POS, VKD3DSIH_SAMPLE_POS, "f", "Ru"}, + {VKD3D_SM4_OP_SAMPLE_INFO, VKD3DSIH_SAMPLE_INFO, "f", "R"}, + {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, + {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, + {VKD3D_SM5_OP_HS_JOIN_PHASE, VKD3DSIH_HS_JOIN_PHASE, "", ""}, + {VKD3D_SM5_OP_EMIT_STREAM, VKD3DSIH_EMIT_STREAM, "", "f"}, + {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, + {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", + shader_sm5_read_fcall}, + {VKD3D_SM5_OP_BUFINFO, VKD3DSIH_BUFINFO, "i", "U"}, + {VKD3D_SM5_OP_DERIV_RTX_COARSE, VKD3DSIH_DSX_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTX_FINE, VKD3DSIH_DSX_FINE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_COARSE, VKD3DSIH_DSY_COARSE, "f", "f"}, + {VKD3D_SM5_OP_DERIV_RTY_FINE, VKD3DSIH_DSY_FINE, "f", "f"}, + {VKD3D_SM5_OP_GATHER4_C, VKD3DSIH_GATHER4_C, "f", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO, VKD3DSIH_GATHER4_PO, "f", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C, VKD3DSIH_GATHER4_PO_C, "f", "fiRSf"}, + {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, + {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, + {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, + {VKD3D_SM5_OP_COUNTBITS, VKD3DSIH_COUNTBITS, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_HI, VKD3DSIH_FIRSTBIT_HI, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_LO, VKD3DSIH_FIRSTBIT_LO, "u", "u"}, + {VKD3D_SM5_OP_FIRSTBIT_SHI, VKD3DSIH_FIRSTBIT_SHI, "u", "i"}, + {VKD3D_SM5_OP_UBFE, VKD3DSIH_UBFE, "u", "iiu"}, + {VKD3D_SM5_OP_IBFE, VKD3DSIH_IBFE, "i", "iii"}, + {VKD3D_SM5_OP_BFI, VKD3DSIH_BFI, "u", "iiuu"}, + {VKD3D_SM5_OP_BFREV, VKD3DSIH_BFREV, "u", "u"}, + {VKD3D_SM5_OP_SWAPC, VKD3DSIH_SWAPC, "ff", "uff"}, + {VKD3D_SM5_OP_DCL_STREAM, VKD3DSIH_DCL_STREAM, "", "O"}, + {VKD3D_SM5_OP_DCL_FUNCTION_BODY, VKD3DSIH_DCL_FUNCTION_BODY, "", "", + shader_sm5_read_dcl_function_body}, + {VKD3D_SM5_OP_DCL_FUNCTION_TABLE, VKD3DSIH_DCL_FUNCTION_TABLE, "", "", + shader_sm5_read_dcl_function_table}, + {VKD3D_SM5_OP_DCL_INTERFACE, VKD3DSIH_DCL_INTERFACE, "", "", + shader_sm5_read_dcl_interface}, + {VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, "", "", + shader_sm5_read_control_point_count}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, VKD3DSIH_DCL_TESSELLATOR_DOMAIN, "", "", + shader_sm5_read_dcl_tessellator_domain}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING, "", "", + shader_sm5_read_dcl_tessellator_partitioning}, + {VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, "", "", + shader_sm5_read_dcl_tessellator_output_primitive}, + {VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR, VKD3DSIH_DCL_HS_MAX_TESSFACTOR, "", "", + shader_sm5_read_dcl_hs_max_tessfactor}, + {VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DCL_THREAD_GROUP, VKD3DSIH_DCL_THREAD_GROUP, "", "", + shader_sm5_read_dcl_thread_group}, + {VKD3D_SM5_OP_DCL_UAV_TYPED, VKD3DSIH_DCL_UAV_TYPED, "", "", + shader_sm4_read_dcl_resource}, + {VKD3D_SM5_OP_DCL_UAV_RAW, VKD3DSIH_DCL_UAV_RAW, "", "", + shader_sm5_read_dcl_uav_raw}, + {VKD3D_SM5_OP_DCL_UAV_STRUCTURED, VKD3DSIH_DCL_UAV_STRUCTURED, "", "", + shader_sm5_read_dcl_uav_structured}, + {VKD3D_SM5_OP_DCL_TGSM_RAW, VKD3DSIH_DCL_TGSM_RAW, "", "", + shader_sm5_read_dcl_tgsm_raw}, + {VKD3D_SM5_OP_DCL_TGSM_STRUCTURED, VKD3DSIH_DCL_TGSM_STRUCTURED, "", "", + shader_sm5_read_dcl_tgsm_structured}, + {VKD3D_SM5_OP_DCL_RESOURCE_RAW, VKD3DSIH_DCL_RESOURCE_RAW, "", "", + shader_sm5_read_dcl_resource_raw}, + {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", + shader_sm5_read_dcl_resource_structured}, + {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3DSIH_LD_UAV_TYPED, "u", "iU"}, + {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3DSIH_STORE_UAV_TYPED, "U", "iu"}, + {VKD3D_SM5_OP_LD_RAW, VKD3DSIH_LD_RAW, "u", "iU"}, + {VKD3D_SM5_OP_STORE_RAW, VKD3DSIH_STORE_RAW, "U", "uu"}, + {VKD3D_SM5_OP_LD_STRUCTURED, VKD3DSIH_LD_STRUCTURED, "u", "iiR"}, + {VKD3D_SM5_OP_STORE_STRUCTURED, VKD3DSIH_STORE_STRUCTURED, "U", "iiu"}, + {VKD3D_SM5_OP_ATOMIC_AND, VKD3DSIH_ATOMIC_AND, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_OR, VKD3DSIH_ATOMIC_OR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_XOR, VKD3DSIH_ATOMIC_XOR, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_CMP_STORE, VKD3DSIH_ATOMIC_CMP_STORE, "U", "iuu"}, + {VKD3D_SM5_OP_ATOMIC_IADD, VKD3DSIH_ATOMIC_IADD, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMAX, VKD3DSIH_ATOMIC_IMAX, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_IMIN, VKD3DSIH_ATOMIC_IMIN, "U", "ii"}, + {VKD3D_SM5_OP_ATOMIC_UMAX, VKD3DSIH_ATOMIC_UMAX, "U", "iu"}, + {VKD3D_SM5_OP_ATOMIC_UMIN, VKD3DSIH_ATOMIC_UMIN, "U", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_ALLOC, VKD3DSIH_IMM_ATOMIC_ALLOC, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CONSUME, VKD3DSIH_IMM_ATOMIC_CONSUME, "u", "U"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IADD, VKD3DSIH_IMM_ATOMIC_IADD, "uU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_AND, VKD3DSIH_IMM_ATOMIC_AND, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_OR, VKD3DSIH_IMM_ATOMIC_OR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_XOR, VKD3DSIH_IMM_ATOMIC_XOR, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_EXCH, VKD3DSIH_IMM_ATOMIC_EXCH, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH, VKD3DSIH_IMM_ATOMIC_CMP_EXCH, "uU", "iuu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMAX, VKD3DSIH_IMM_ATOMIC_IMAX, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_IMIN, VKD3DSIH_IMM_ATOMIC_IMIN, "iU", "ii"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMAX, VKD3DSIH_IMM_ATOMIC_UMAX, "uU", "iu"}, + {VKD3D_SM5_OP_IMM_ATOMIC_UMIN, VKD3DSIH_IMM_ATOMIC_UMIN, "uU", "iu"}, + {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", + shader_sm5_read_sync}, + {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, + {VKD3D_SM5_OP_DMAX, VKD3DSIH_DMAX, "d", "dd"}, + {VKD3D_SM5_OP_DMIN, VKD3DSIH_DMIN, "d", "dd"}, + {VKD3D_SM5_OP_DMUL, VKD3DSIH_DMUL, "d", "dd"}, + {VKD3D_SM5_OP_DEQ, VKD3DSIH_DEQ, "u", "dd"}, + {VKD3D_SM5_OP_DGE, VKD3DSIH_DGE, "u", "dd"}, + {VKD3D_SM5_OP_DLT, VKD3DSIH_DLT, "u", "dd"}, + {VKD3D_SM5_OP_DNE, VKD3DSIH_DNE, "u", "dd"}, + {VKD3D_SM5_OP_DMOV, VKD3DSIH_DMOV, "d", "d"}, + {VKD3D_SM5_OP_DMOVC, VKD3DSIH_DMOVC, "d", "udd"}, + {VKD3D_SM5_OP_DTOF, VKD3DSIH_DTOF, "f", "d"}, + {VKD3D_SM5_OP_FTOD, VKD3DSIH_FTOD, "d", "f"}, + {VKD3D_SM5_OP_EVAL_SAMPLE_INDEX, VKD3DSIH_EVAL_SAMPLE_INDEX, "f", "fi"}, + {VKD3D_SM5_OP_EVAL_CENTROID, VKD3DSIH_EVAL_CENTROID, "f", "f"}, + {VKD3D_SM5_OP_DCL_GS_INSTANCES, VKD3DSIH_DCL_GS_INSTANCES, "", "", + shader_sm4_read_declaration_count}, + {VKD3D_SM5_OP_DDIV, VKD3DSIH_DDIV, "d", "dd"}, + {VKD3D_SM5_OP_DFMA, VKD3DSIH_DFMA, "d", "ddd"}, + {VKD3D_SM5_OP_DRCP, VKD3DSIH_DRCP, "d", "d"}, + {VKD3D_SM5_OP_MSAD, VKD3DSIH_MSAD, "u", "uuu"}, + {VKD3D_SM5_OP_DTOI, VKD3DSIH_DTOI, "i", "d"}, + {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, + {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, + {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, + {VKD3D_SM5_OP_GATHER4_S, VKD3DSIH_GATHER4_S, "uu", "fRS"}, + {VKD3D_SM5_OP_GATHER4_C_S, VKD3DSIH_GATHER4_C_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_GATHER4_PO_S, VKD3DSIH_GATHER4_PO_S, "fu", "fiRS"}, + {VKD3D_SM5_OP_GATHER4_PO_C_S, VKD3DSIH_GATHER4_PO_C_S, "fu", "fiRSf"}, + {VKD3D_SM5_OP_LD_S, VKD3DSIH_LD_S, "uu", "iR"}, + {VKD3D_SM5_OP_LD2DMS_S, VKD3DSIH_LD2DMS_S, "uu", "iRi"}, + {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3DSIH_LD_UAV_TYPED_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_RAW_S, VKD3DSIH_LD_RAW_S, "uu", "iU"}, + {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3DSIH_LD_STRUCTURED_S, "uu", "iiR"}, + {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3DSIH_SAMPLE_LOD_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3DSIH_SAMPLE_C_LZ_S, "fu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3DSIH_SAMPLE_CL_S, "uu", "fRSf"}, + {VKD3D_SM5_OP_SAMPLE_B_CL_S, VKD3DSIH_SAMPLE_B_CL_S, "uu", "fRSff"}, + {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3DSIH_SAMPLE_GRAD_CL_S, "uu", "fRSfff"}, + {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3DSIH_SAMPLE_C_CL_S, "fu", "fRSff"}, + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, +}; + +static const enum vkd3d_shader_register_type register_type_table[] = +{ + /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, + /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, + /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, + /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, + /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, + /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, + /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, + /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, + /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, + /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, + /* UNKNOWN */ ~0u, + /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, + /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, + /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, + /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, + /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, + /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, + /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, + /* UNKNOWN */ ~0u, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, + /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, + /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, + /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, + /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, + /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, + /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, + /* UNKNOWN */ ~0u, + /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, + /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, + /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, + /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, + /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, + /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, + /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, + /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, + /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, + /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, + /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, + /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, +}; + +static const enum vkd3d_shader_register_precision register_precision_table[] = +{ + /* VKD3D_SM4_REGISTER_PRECISION_DEFAULT */ VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_FLOAT_10 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_FLOAT_10, + /* UNKNOWN */ VKD3D_SHADER_REGISTER_PRECISION_INVALID, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_INT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_INT_16, + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, +}; + +static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) +{ + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { + if (opcode == opcode_table[i].opcode) return &opcode_table[i]; + } + + return NULL; +} + +static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) +{ + switch (sm4->p.shader_version.type) + { + case VKD3D_SHADER_TYPE_PIXEL: + if (reg->type == VKD3DSPR_OUTPUT) + { + unsigned int reg_idx = reg->idx[0].offset; + + if (reg_idx >= ARRAY_SIZE(sm4->output_map)) + { + /* Validated later */ + break; + } + + reg->type = VKD3DSPR_COLOROUT; + reg->idx[0].offset = sm4->output_map[reg_idx]; + } + break; + + default: + break; + } +} + +static enum vkd3d_data_type map_data_type(char t) +{ + switch (t) + { + case 'd': + return VKD3D_DATA_DOUBLE; + case 'f': + return VKD3D_DATA_FLOAT; + case 'i': + return VKD3D_DATA_INT; + case 'u': + return VKD3D_DATA_UINT; + case 'O': + return VKD3D_DATA_OPAQUE; + case 'R': + return VKD3D_DATA_RESOURCE; + case 'S': + return VKD3D_DATA_SAMPLER; + case 'U': + return VKD3D_DATA_UAV; + default: + ERR("Invalid data type '%c'.\n", t); + return VKD3D_DATA_FLOAT; + } +} + +static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) +{ + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + shader_instruction_array_destroy(&parser->instructions); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm4); +} + +static bool shader_sm4_read_reg_idx(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, uint32_t addressing, struct vkd3d_shader_register_index *reg_idx) +{ + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { + struct vkd3d_shader_src_param *rel_addr = shader_parser_get_src_params(&priv->p, 1); + + if (!(reg_idx->rel_addr = rel_addr)) + { + ERR("Failed to get src param for relative addressing.\n"); + return false; + } + + if (addressing & VKD3D_SM4_ADDRESSING_OFFSET) + reg_idx->offset = *(*ptr)++; + else + reg_idx->offset = 0; + shader_sm4_read_src_param(priv, ptr, end, VKD3D_DATA_INT, rel_addr); + } + else + { + reg_idx->rel_addr = NULL; + reg_idx->offset = *(*ptr)++; + } + + return true; +} + +static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_type) +{ + switch (register_type) + { + case VKD3D_SM4_RT_SAMPLER: + case VKD3D_SM4_RT_RESOURCE: + case VKD3D_SM4_RT_CONSTBUFFER: + case VKD3D_SM5_RT_UAV: + return true; + + default: + return false; + } +} + +static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) +{ + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; + enum vkd3d_sm4_register_modifier m; + uint32_t token, order, extended; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; + if (register_type >= ARRAY_SIZE(register_type_table) + || register_type_table[register_type] == VKD3DSPR_INVALID) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { + param->type = register_type_table[register_type]; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; + param->data_type = data_type; + + *modifier = VKD3DSPSM_NONE; + if (token & VKD3D_SM4_EXTENDED_OPERAND) + { + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + extended = *(*ptr)++; + + if (extended & VKD3D_SM4_EXTENDED_OPERAND) + { + FIXME("Skipping second-order extended operand.\n"); + *ptr += *ptr < end; + } + + type = extended & VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK; + if (type == VKD3D_SM4_EXTENDED_OPERAND_MODIFIER) + { + m = (extended & VKD3D_SM4_REGISTER_MODIFIER_MASK) >> VKD3D_SM4_REGISTER_MODIFIER_SHIFT; + switch (m) + { + case VKD3D_SM4_REGISTER_MODIFIER_NEGATE: + *modifier = VKD3DSPSM_NEG; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS: + *modifier = VKD3DSPSM_ABS; + break; + + case VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE: + *modifier = VKD3DSPSM_ABSNEG; + break; + + default: + FIXME("Unhandled register modifier %#x.\n", m); + /* fall-through */ + case VKD3D_SM4_REGISTER_MODIFIER_NONE: + break; + } + + precision = (extended & VKD3D_SM4_REGISTER_PRECISION_MASK) >> VKD3D_SM4_REGISTER_PRECISION_SHIFT; + if (precision >= ARRAY_SIZE(register_precision_table) + || register_precision_table[precision] == VKD3D_SHADER_REGISTER_PRECISION_INVALID) + { + FIXME("Unhandled register precision %#x.\n", precision); + param->precision = VKD3D_SHADER_REGISTER_PRECISION_INVALID; + } + else + { + param->precision = register_precision_table[precision]; + } + + if (extended & VKD3D_SM4_REGISTER_NON_UNIFORM_MASK) + param->non_uniform = true; + + extended &= ~(VKD3D_SM4_EXTENDED_OPERAND_TYPE_MASK | VKD3D_SM4_REGISTER_MODIFIER_MASK + | VKD3D_SM4_REGISTER_PRECISION_MASK | VKD3D_SM4_REGISTER_NON_UNIFORM_MASK + | VKD3D_SM4_EXTENDED_OPERAND); + if (extended) + FIXME("Skipping unhandled extended operand bits 0x%08x.\n", extended); + } + else if (type) + { + FIXME("Skipping unhandled extended operand token 0x%08x (type %#x).\n", extended, type); + } + } + + order = (token & VKD3D_SM4_REGISTER_ORDER_MASK) >> VKD3D_SM4_REGISTER_ORDER_SHIFT; + + if (order < 1) + { + param->idx[0].offset = ~0u; + param->idx[0].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK0) >> VKD3D_SM4_ADDRESSING_SHIFT0; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[0]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 2) + { + param->idx[1].offset = ~0u; + param->idx[1].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK1) >> VKD3D_SM4_ADDRESSING_SHIFT1; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[1]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order < 3) + { + param->idx[2].offset = ~0u; + param->idx[2].rel_addr = NULL; + } + else + { + DWORD addressing = (token & VKD3D_SM4_ADDRESSING_MASK2) >> VKD3D_SM4_ADDRESSING_SHIFT2; + if (!(shader_sm4_read_reg_idx(priv, ptr, end, addressing, ¶m->idx[2]))) + { + ERR("Failed to read register index.\n"); + return false; + } + } + + if (order > 3) + { + WARN("Unhandled order %u.\n", order); + return false; + } + + param->idx_count = order; + + if (register_type == VKD3D_SM4_RT_IMMCONST || register_type == VKD3D_SM4_RT_IMMCONST64) + { + enum vkd3d_sm4_dimension dimension = (token & VKD3D_SM4_DIMENSION_MASK) >> VKD3D_SM4_DIMENSION_SHIFT; + unsigned int dword_count; + + switch (dimension) + { + case VKD3D_SM4_DIMENSION_SCALAR: + param->immconst_type = VKD3D_IMMCONST_SCALAR; + dword_count = 1 + (register_type == VKD3D_SM4_RT_IMMCONST64); + if (end - *ptr < dword_count) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, dword_count * sizeof(DWORD)); + *ptr += dword_count; + break; + + case VKD3D_SM4_DIMENSION_VEC4: + param->immconst_type = VKD3D_IMMCONST_VEC4; + if (end - *ptr < VKD3D_VEC4_SIZE) + { + WARN("Invalid ptr %p, end %p.\n", *ptr, end); + return false; + } + memcpy(param->u.immconst_uint, *ptr, VKD3D_VEC4_SIZE * sizeof(DWORD)); + *ptr += 4; + break; + + default: + FIXME("Unhandled dimension %#x.\n", dimension); + break; + } + } + else if (!shader_is_sm_5_1(priv) && sm4_register_is_descriptor(register_type)) + { + /* SM5.1 places a symbol identifier in idx[0] and moves + * other values up one slot. Normalize to SM5.1. */ + param->idx[2] = param->idx[1]; + param->idx[1] = param->idx[0]; + ++param->idx_count; + } + + map_register(priv, param); + + return true; +} + +static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_COVERAGE: + case VKD3DSPR_DEPTHOUT: + case VKD3DSPR_DEPTHOUTGE: + case VKD3DSPR_DEPTHOUTLE: + case VKD3DSPR_GSINSTID: + case VKD3DSPR_LOCALTHREADINDEX: + case VKD3DSPR_OUTPOINTID: + case VKD3DSPR_PRIMID: + case VKD3DSPR_SAMPLEMASK: + case VKD3DSPR_OUTSTENCILREF: + return true; + default: + return false; + } +} + +static uint32_t swizzle_from_sm4(uint32_t s) +{ + return vkd3d_shader_create_swizzle(s & 0x3, (s >> 2) & 0x3, (s >> 4) & 0x3, (s >> 6) & 0x3); +} + +static bool register_is_input_output(const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_OUTPUT: + case VKD3DSPR_COLOROUT: + case VKD3DSPR_INCONTROLPOINT: + case VKD3DSPR_OUTCONTROLPOINT: + case VKD3DSPR_PATCHCONST: + return true; + + default: + return false; + } +} + +static bool register_is_control_point_input(const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_sm4_parser *priv) +{ + return reg->type == VKD3DSPR_INCONTROLPOINT || reg->type == VKD3DSPR_OUTCONTROLPOINT + || (reg->type == VKD3DSPR_INPUT && (priv->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE + || priv->p.shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY)); +} + +static unsigned int mask_from_swizzle(unsigned int swizzle) +{ + return (1u << vkd3d_swizzle_get_component(swizzle, 0)) + | (1u << vkd3d_swizzle_get_component(swizzle, 1)) + | (1u << vkd3d_swizzle_get_component(swizzle, 2)) + | (1u << vkd3d_swizzle_get_component(swizzle, 3)); +} + +static bool shader_sm4_validate_input_output_register(struct vkd3d_shader_sm4_parser *priv, + const struct vkd3d_shader_register *reg, unsigned int mask) +{ + unsigned int idx_count = 1 + register_is_control_point_input(reg, priv); + const unsigned int *masks; + unsigned int register_idx; + + if (reg->idx_count != idx_count) + { + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT, + "Invalid index count %u for register type %#x; expected count %u.", + reg->idx_count, reg->type, idx_count); + return false; + } + + switch (reg->type) + { + case VKD3DSPR_INPUT: + case VKD3DSPR_INCONTROLPOINT: + masks = priv->input_register_masks; + break; + case VKD3DSPR_OUTPUT: + masks = sm4_parser_is_in_fork_or_join_phase(priv) ? priv->patch_constant_register_masks + : priv->output_register_masks; + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + masks = priv->output_register_masks; + break; + case VKD3DSPR_PATCHCONST: + masks = priv->patch_constant_register_masks; + break; + + default: + vkd3d_unreachable(); + } + + register_idx = reg->idx[reg->idx_count - 1].offset; + /* The signature element registers have already been checked against MAX_REG_OUTPUT. */ + if (register_idx >= MAX_REG_OUTPUT || (masks[register_idx] & mask) != mask) + { + WARN("Failed to find signature element for register type %#x, index %u and mask %#x.\n", + reg->type, register_idx, mask); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER, + "Could not find signature element matching register type %#x, index %u and mask %#x.", + reg->type, register_idx, mask); + return false; + } + + return true; +} + +static bool shader_sm4_read_src_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_src_param *src_param) +{ + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &src_param->reg, &src_param->modifiers)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (src_param->reg.type == VKD3DSPR_IMMCONST || src_param->reg.type == VKD3DSPR_IMMCONST64) + { + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } + else + { + enum vkd3d_sm4_swizzle_type swizzle_type = + (token & VKD3D_SM4_SWIZZLE_TYPE_MASK) >> VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + if (shader_sm4_is_scalar_register(&src_param->reg)) + src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + else + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + break; + + case VKD3D_SM4_SWIZZLE_SCALAR: + src_param->swizzle = (token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT; + src_param->swizzle = (src_param->swizzle & 0x3) * 0x01010101; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + src_param->swizzle = swizzle_from_sm4((token & VKD3D_SM4_SWIZZLE_MASK) >> VKD3D_SM4_SWIZZLE_SHIFT); + break; + + default: + FIXME("Unhandled swizzle type %#x.\n", swizzle_type); + break; + } + } + + if (register_is_input_output(&src_param->reg) && !shader_sm4_validate_input_output_register(priv, + &src_param->reg, mask_from_swizzle(src_param->swizzle))) + return false; + + return true; +} + +static bool shader_sm4_read_dst_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, + const uint32_t *end, enum vkd3d_data_type data_type, struct vkd3d_shader_dst_param *dst_param) +{ + enum vkd3d_shader_src_modifier modifier; + DWORD token; + + if (*ptr >= end) + { + WARN("Invalid ptr %p >= end %p.\n", *ptr, end); + return false; + } + token = **ptr; + + if (!shader_sm4_read_param(priv, ptr, end, data_type, &dst_param->reg, &modifier)) + { + ERR("Failed to read parameter.\n"); + return false; + } + + if (modifier != VKD3DSPSM_NONE) + { + ERR("Invalid source modifier %#x on destination register.\n", modifier); + return false; + } + + dst_param->write_mask = (token & VKD3D_SM4_WRITEMASK_MASK) >> VKD3D_SM4_WRITEMASK_SHIFT; + if (data_type == VKD3D_DATA_DOUBLE) + dst_param->write_mask = vkd3d_write_mask_64_from_32(dst_param->write_mask); + /* Scalar registers are declared with no write mask in shader bytecode. */ + if (!dst_param->write_mask && shader_sm4_is_scalar_register(&dst_param->reg)) + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + dst_param->modifiers = 0; + dst_param->shift = 0; + + if (register_is_input_output(&dst_param->reg) && !shader_sm4_validate_input_output_register(priv, + &dst_param->reg, dst_param->write_mask)) + return false; + + return true; +} + +static void shader_sm4_read_instruction_modifier(DWORD modifier, struct vkd3d_shader_instruction *ins) +{ + enum vkd3d_sm4_instruction_modifier modifier_type = modifier & VKD3D_SM4_MODIFIER_MASK; + + switch (modifier_type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + { + static const DWORD recognized_bits = VKD3D_SM4_INSTRUCTION_MODIFIER + | VKD3D_SM4_MODIFIER_MASK + | VKD3D_SM4_AOFFIMMI_U_MASK + | VKD3D_SM4_AOFFIMMI_V_MASK + | VKD3D_SM4_AOFFIMMI_W_MASK; + + /* Bit fields are used for sign extension. */ + struct + { + int u : 4; + int v : 4; + int w : 4; + } aoffimmi; + + if (modifier & ~recognized_bits) + FIXME("Unhandled instruction modifier %#x.\n", modifier); + + aoffimmi.u = (modifier & VKD3D_SM4_AOFFIMMI_U_MASK) >> VKD3D_SM4_AOFFIMMI_U_SHIFT; + aoffimmi.v = (modifier & VKD3D_SM4_AOFFIMMI_V_MASK) >> VKD3D_SM4_AOFFIMMI_V_SHIFT; + aoffimmi.w = (modifier & VKD3D_SM4_AOFFIMMI_W_MASK) >> VKD3D_SM4_AOFFIMMI_W_SHIFT; + ins->texel_offset.u = aoffimmi.u; + ins->texel_offset.v = aoffimmi.v; + ins->texel_offset.w = aoffimmi.w; + break; + } + + case VKD3D_SM5_MODIFIER_DATA_TYPE: + { + DWORD components = (modifier & VKD3D_SM5_MODIFIER_DATA_TYPE_MASK) >> VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT; + unsigned int i; + + for (i = 0; i < VKD3D_VEC4_SIZE; i++) + { + enum vkd3d_sm4_data_type data_type = VKD3D_SM4_TYPE_COMPONENT(components, i); + + if (!data_type || (data_type >= ARRAY_SIZE(data_type_table))) + { + FIXME("Unhandled data type %#x.\n", data_type); + ins->resource_data_type[i] = VKD3D_DATA_FLOAT; + } + else + { + ins->resource_data_type[i] = data_type_table[data_type]; + } + } + break; + } + + case VKD3D_SM5_MODIFIER_RESOURCE_TYPE: + { + enum vkd3d_sm4_resource_type resource_type + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT; + + if (resource_type == VKD3D_SM4_RESOURCE_RAW_BUFFER) + ins->raw = true; + else if (resource_type == VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER) + ins->structured = true; + + if (resource_type < ARRAY_SIZE(resource_type_table)) + ins->resource_type = resource_type_table[resource_type]; + else + { + FIXME("Unhandled resource type %#x.\n", resource_type); + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + } + + ins->resource_stride + = (modifier & VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK) >> VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT; + break; + } + + default: + FIXME("Unhandled instruction modifier %#x.\n", modifier); + } +} + +static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_sm4_opcode_info *opcode_info; + uint32_t opcode_token, opcode, previous_token; + struct vkd3d_shader_dst_param *dst_params; + struct vkd3d_shader_src_param *src_params; + const uint32_t **ptr = &sm4->ptr; + unsigned int i, len; + size_t remaining; + const uint32_t *p; + DWORD precise; + + if (*ptr >= sm4->end) + { + WARN("End of byte-code, failed to read opcode.\n"); + goto fail; + } + remaining = sm4->end - *ptr; + + ++sm4->p.location.line; + + opcode_token = *(*ptr)++; + opcode = opcode_token & VKD3D_SM4_OPCODE_MASK; + + len = ((opcode_token & VKD3D_SM4_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + if (!len) + { + if (remaining < 2) + { + WARN("End of byte-code, failed to read length token.\n"); + goto fail; + } + len = **ptr; + } + if (!len || remaining < len) + { + WARN("Read invalid length %u (remaining %zu).\n", len, remaining); + goto fail; + } + --len; + + if (!(opcode_info = get_opcode_info(opcode))) + { + FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); + ins->handler_idx = VKD3DSIH_INVALID; + *ptr += len; + return; + } + + ins->handler_idx = opcode_info->handler_idx; + if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + sm4->phase = ins->handler_idx; + sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; + ins->flags = 0; + ins->coissue = false; + ins->raw = false; + ins->structured = false; + ins->predicate = NULL; + ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); + ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); + ins->src = src_params = shader_parser_get_src_params(&sm4->p, ins->src_count); + if (!src_params && ins->src_count) + { + ERR("Failed to allocate src parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; + ins->resource_stride = 0; + ins->resource_data_type[0] = VKD3D_DATA_FLOAT; + ins->resource_data_type[1] = VKD3D_DATA_FLOAT; + ins->resource_data_type[2] = VKD3D_DATA_FLOAT; + ins->resource_data_type[3] = VKD3D_DATA_FLOAT; + memset(&ins->texel_offset, 0, sizeof(ins->texel_offset)); + + p = *ptr; + *ptr += len; + + if (opcode_info->read_opcode_func) + { + ins->dst = NULL; + ins->dst_count = 0; + opcode_info->read_opcode_func(ins, opcode, opcode_token, p, len, sm4); + } + else + { + enum vkd3d_shader_dst_modifier instruction_dst_modifier = VKD3DSPDM_NONE; + + previous_token = opcode_token; + while (previous_token & VKD3D_SM4_INSTRUCTION_MODIFIER && p != *ptr) + shader_sm4_read_instruction_modifier(previous_token = *p++, ins); + + ins->flags = (opcode_token & VKD3D_SM4_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + if (ins->flags & VKD3D_SM4_INSTRUCTION_FLAG_SATURATE) + { + ins->flags &= ~VKD3D_SM4_INSTRUCTION_FLAG_SATURATE; + instruction_dst_modifier = VKD3DSPDM_SATURATE; + } + precise = (opcode_token & VKD3D_SM5_PRECISE_MASK) >> VKD3D_SM5_PRECISE_SHIFT; + ins->flags |= precise << VKD3DSI_PRECISE_SHIFT; + + ins->dst = dst_params = shader_parser_get_dst_params(&sm4->p, ins->dst_count); + if (!dst_params && ins->dst_count) + { + ERR("Failed to allocate dst parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + for (i = 0; i < ins->dst_count; ++i) + { + if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), + &dst_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + dst_params[i].modifiers |= instruction_dst_modifier; + } + + for (i = 0; i < ins->src_count; ++i) + { + if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), + &src_params[i]))) + { + ins->handler_idx = VKD3DSIH_INVALID; + return; + } + } + } + + return; + +fail: + *ptr = sm4->end; + ins->handler_idx = VKD3DSIH_INVALID; + return; +} + +static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = +{ + .parser_destroy = shader_sm4_destroy, +}; + +static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, + size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_version version; + uint32_t version_token, token_count; + unsigned int i; + + if (byte_code_size / sizeof(*byte_code) < 2) + { + WARN("Invalid byte code size %lu.\n", (long)byte_code_size); + return false; + } + + version_token = byte_code[0]; + TRACE("Version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 2 || byte_code_size / sizeof(*byte_code) < token_count) + { + WARN("Invalid token count %u.\n", token_count); + return false; + } + + sm4->start = &byte_code[2]; + sm4->end = &byte_code[token_count]; + + switch (version_token >> 16) + { + case VKD3D_SM4_PS: + version.type = VKD3D_SHADER_TYPE_PIXEL; + break; + + case VKD3D_SM4_VS: + version.type = VKD3D_SHADER_TYPE_VERTEX; + break; + + case VKD3D_SM4_GS: + version.type = VKD3D_SHADER_TYPE_GEOMETRY; + break; + + case VKD3D_SM5_HS: + version.type = VKD3D_SHADER_TYPE_HULL; + break; + + case VKD3D_SM5_DS: + version.type = VKD3D_SHADER_TYPE_DOMAIN; + break; + + case VKD3D_SM5_CS: + version.type = VKD3D_SHADER_TYPE_COMPUTE; + break; + + default: + FIXME("Unrecognised shader type %#x.\n", version_token >> 16); + } + version.major = VKD3D_SM4_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vkd3d_shader_parser_init(&sm4->p, message_context, source_name, &version, &shader_sm4_parser_ops, + token_count / 7u + 20)) + return false; + sm4->ptr = sm4->start; + + memset(sm4->output_map, 0xff, sizeof(sm4->output_map)); + for (i = 0; i < output_signature->element_count; ++i) + { + struct signature_element *e = &output_signature->elements[i]; + + if (version.type == VKD3D_SHADER_TYPE_PIXEL + && ascii_strcasecmp(e->semantic_name, "SV_Target")) + continue; + if (e->register_index >= ARRAY_SIZE(sm4->output_map)) + { + WARN("Invalid output index %u.\n", e->register_index); + continue; + } + + sm4->output_map[e->register_index] = e->semantic_index; + } + + return true; +} + +static bool shader_sm4_parser_validate_signature(struct vkd3d_shader_sm4_parser *sm4, + const struct shader_signature *signature, unsigned int *masks, const char *name) +{ + unsigned int i, register_idx, register_count, mask; + + for (i = 0; i < signature->element_count; ++i) + { + register_idx = signature->elements[i].register_index; + register_count = signature->elements[i].register_count; + if (register_idx != ~0u && (register_idx >= MAX_REG_OUTPUT || MAX_REG_OUTPUT - register_idx < register_count)) + { + WARN("%s signature element %u unhandled register index %u, count %u.\n", + name, i, register_idx, register_count); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS, + "%s signature element %u register index %u, count %u exceeds maximum index of %u.", name, + i, register_idx, register_count, MAX_REG_OUTPUT - 1); + return false; + } + + if (!vkd3d_bitmask_is_contiguous(mask = signature->elements[i].mask)) + { + WARN("%s signature element %u mask %#x is not contiguous.\n", name, i, mask); + vkd3d_shader_parser_warning(&sm4->p, VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS, + "%s signature element %u mask %#x is not contiguous.", name, i, mask); + } + + if (register_idx != ~0u) + masks[register_idx] |= mask; + } + + return true; +} + +static int index_range_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct sm4_index_range)); +} + +static void shader_sm4_validate_default_phase_index_ranges(struct vkd3d_shader_sm4_parser *sm4) +{ + if (!sm4->input_index_ranges.count || !sm4->output_index_ranges.count) + return; + + if (sm4->input_index_ranges.count == sm4->output_index_ranges.count) + { + qsort(sm4->input_index_ranges.ranges, sm4->input_index_ranges.count, sizeof(sm4->input_index_ranges.ranges[0]), + index_range_compare); + qsort(sm4->output_index_ranges.ranges, sm4->output_index_ranges.count, sizeof(sm4->output_index_ranges.ranges[0]), + index_range_compare); + if (!memcmp(sm4->input_index_ranges.ranges, sm4->output_index_ranges.ranges, + sm4->input_index_ranges.count * sizeof(sm4->input_index_ranges.ranges[0]))) + return; + } + + /* This is very unlikely to occur and would complicate the default control point phase implementation. */ + WARN("Default phase index ranges are not identical.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL, + "Default control point phase input and output index range declarations are not identical."); + return; +} + +int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_instruction_array *instructions; + struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm4_parser *sm4; + int ret; + + if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm4->p.shader_desc; + if ((ret = shader_extract_from_dxbc(&compile_info->source, + message_context, compile_info->source_name, shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm4); + return ret; + } + + if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, &shader_desc->output_signature, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + free_shader_desc(shader_desc); + vkd3d_free(sm4); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + + if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, + sm4->input_register_masks, "Input") + || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, + sm4->output_register_masks, "Output") + || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, + sm4->patch_constant_register_masks, "Patch constant")) + { + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_INVALID_SHADER; + } + + instructions = &sm4->p.instructions; + while (sm4->ptr != sm4->end) + { + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) + { + ERR("Failed to allocate instructions.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ins = &instructions->elements[instructions->count]; + shader_sm4_read_instruction(sm4, ins); + + if (ins->handler_idx == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + shader_sm4_destroy(&sm4->p); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++instructions->count; + } + if (sm4->p.shader_version.type == VKD3D_SHADER_TYPE_HULL && !sm4->has_control_point_phase && !sm4->p.failed) + shader_sm4_validate_default_phase_index_ranges(sm4); + + *parser = &sm4->p; + + return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); + +static bool type_is_integer(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return true; + + default: + return false; + } +} + +bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) +{ + unsigned int i; + + static const struct + { + const char *semantic; + bool output; + enum vkd3d_shader_type shader_type; + enum vkd3d_sm4_swizzle_type swizzle_type; + enum vkd3d_sm4_register_type type; + bool has_idx; + } + register_table[] = + { + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, + + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ + {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { + if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type) + { + *type = register_table[i].type; + if (swizzle_type) + *swizzle_type = register_table[i].swizzle_type; + *has_idx = register_table[i].has_idx; + return true; + } + } + + return false; +} + +bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, D3D_NAME *usage) +{ + unsigned int i; + + static const struct + { + const char *name; + bool output; + enum vkd3d_shader_type shader_type; + D3DDECLUSAGE usage; + } + semantics[] = + { + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, + + {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, + {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, + + {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, + }; + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { + if (!ascii_strcasecmp(semantic->name, semantics[i].name) + && output == semantics[i].output + && ctx->profile->type == semantics[i].shader_type + && !ascii_strncasecmp(semantic->name, "sv_", 3)) + { + *usage = semantics[i].usage; + return true; + } + } + + if (!ascii_strncasecmp(semantic->name, "sv_", 3)) + return false; + + *usage = D3D_NAME_UNDEFINED; + return true; +} + +static void add_section(struct dxbc_writer *dxbc, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) +{ + /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN + * sections to be aligned. Without this, the sections themselves will be + * aligned, but their reported sizes won't. */ + size_t size = bytecode_align(buffer); + + dxbc_writer_add_section(dxbc, tag, buffer->data, size); +} + +static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +{ + struct vkd3d_bytecode_buffer buffer = {0}; + struct vkd3d_string_buffer *string; + const struct hlsl_ir_var *var; + size_t count_position; + unsigned int i; + bool ret; + + count_position = put_u32(&buffer, 0); + put_u32(&buffer, 8); /* unknown */ + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; + enum vkd3d_sm4_register_type type; + uint32_t usage_idx, reg_idx; + D3D_NAME usage; + bool has_idx; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + assert(ret); + if (usage == ~0u) + continue; + usage_idx = var->semantic.index; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) + { + reg_idx = has_idx ? var->semantic.index : ~0u; + } + else + { + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + type = VKD3D_SM4_RT_INPUT; + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; + } + + use_mask = width; /* FIXME: accurately report use mask */ + if (output) + use_mask = 0xf ^ use_mask; + + /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ + if (usage >= 64) + usage = 0; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, usage_idx); + put_u32(&buffer, usage); + switch (var->data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); + break; + + case HLSL_TYPE_INT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); + break; + + default: + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); + put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); + } + put_u32(&buffer, reg_idx); + put_u32(&buffer, vkd3d_make_u16(width, use_mask)); + } + + i = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + const char *semantic = var->semantic.name; + size_t string_offset; + D3D_NAME usage; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + continue; + + if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) + string_offset = put_string(&buffer, "SV_Target"); + else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) + string_offset = put_string(&buffer, "SV_Depth"); + else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) + string_offset = put_string(&buffer, "SV_Position"); + else + string_offset = put_string(&buffer, semantic); + set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); + } + + set_u32(&buffer, count_position, i); + + add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); +} + +static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +{ + switch (type->class) + { + case HLSL_CLASS_ARRAY: + return sm4_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3D_SVC_MATRIX_COLUMNS; + else + return D3D_SVC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3D_SVC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3D_SVC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3D_SVC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3D_SVC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->class); + vkd3d_unreachable(); + } +} + +static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3D_SVT_BOOL; + case HLSL_TYPE_DOUBLE: + return D3D_SVT_DOUBLE; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_SVT_FLOAT; + case HLSL_TYPE_INT: + return D3D_SVT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3D_SVT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_SAMPLER; + default: + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_STRING: + return D3D_SVT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SVT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SVT_TEXTURE2D; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SVT_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_3D: + return D3D_SVT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SVT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3D_SVT_TEXTURE; + default: + vkd3d_unreachable(); + } + break; + case HLSL_TYPE_UINT: + return D3D_SVT_UINT; + case HLSL_TYPE_VERTEXSHADER: + return D3D_SVT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3D_SVT_VOID; + default: + vkd3d_unreachable(); + } +} + +static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type) +{ + const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); + const char *name = array_type->name ? array_type->name : ""; + const struct hlsl_profile_info *profile = ctx->profile; + unsigned int field_count = 0, array_size = 0; + size_t fields_offset = 0, name_offset = 0; + size_t i; + + if (type->bytecode_offset) + return; + + if (profile->major_version >= 5) + name_offset = put_string(buffer, name); + + if (type->class == HLSL_CLASS_ARRAY) + array_size = hlsl_get_multiarray_size(type); + + if (array_type->class == HLSL_CLASS_STRUCT) + { + field_count = array_type->e.record.field_count; + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + field->name_bytecode_offset = put_string(buffer, field->name); + write_sm4_type(ctx, buffer, field->type); + } + + fields_offset = bytecode_align(buffer); + + for (i = 0; i < field_count; ++i) + { + struct hlsl_struct_field *field = &array_type->e.record.fields[i]; + + put_u32(buffer, field->name_bytecode_offset); + put_u32(buffer, field->type->bytecode_offset); + put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); + } + } + + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type))); + put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, field_count)); + put_u32(buffer, fields_offset); + + if (profile->major_version >= 5) + { + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, 0); /* FIXME: unknown */ + put_u32(buffer, name_offset); + } +} + +static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_type(type->e.array.type); + + switch (type->base_type) + { + case HLSL_TYPE_SAMPLER: + return D3D_SIT_SAMPLER; + case HLSL_TYPE_TEXTURE: + return D3D_SIT_TEXTURE; + case HLSL_TYPE_UAV: + return D3D_SIT_UAV_RWTYPED; + default: + vkd3d_unreachable(); + } +} + +static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_format(type->e.array.type); + + switch (type->e.resource_format->base_type) + { + case HLSL_TYPE_DOUBLE: + return D3D_RETURN_TYPE_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3D_RETURN_TYPE_FLOAT; + + case HLSL_TYPE_INT: + return D3D_RETURN_TYPE_SINT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + return D3D_RETURN_TYPE_UINT; + + default: + vkd3d_unreachable(); + } +} + +static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_ARRAY) + return sm4_rdef_resource_dimension(type->e.array.type); + + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3D_SRV_DIMENSION_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3D_SRV_DIMENSION_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3D_SRV_DIMENSION_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3D_SRV_DIMENSION_TEXTURECUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return D3D_SRV_DIMENSION_TEXTURE1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return D3D_SRV_DIMENSION_TEXTURE2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return D3D_SRV_DIMENSION_BUFFER; + default: + vkd3d_unreachable(); + } +} + +static int sm4_compare_extern_resources(const void *a, const void *b) +{ + const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; + const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; + enum hlsl_regset aa_regset, bb_regset; + + aa_regset = hlsl_type_get_regset(aa->data_type); + bb_regset = hlsl_type_get_regset(bb->data_type); + + if (aa_regset != bb_regset) + return aa_regset - bb_regset; + + return aa->regs[aa_regset].id - bb->regs[bb_regset].id; +} + +static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +{ + const struct hlsl_ir_var **extern_resources = NULL; + const struct hlsl_ir_var *var; + enum hlsl_regset regset; + size_t capacity = 0; + + *count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + *count = 0; + return NULL; + } + + extern_resources[*count] = var; + ++*count; + } + + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); + return extern_resources; +} + +static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +{ + unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + + static const uint16_t target_types[] = + { + 0xffff, /* PIXEL */ + 0xfffe, /* VERTEX */ + 0x4753, /* GEOMETRY */ + 0x4853, /* HULL */ + 0x4453, /* DOMAIN */ + 0x4353, /* COMPUTE */ + }; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + resource_count += extern_resources_count; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + { + ++cbuffer_count; + ++resource_count; + } + } + + put_u32(&buffer, cbuffer_count); + cbuffer_position = put_u32(&buffer, 0); + put_u32(&buffer, resource_count); + resource_position = put_u32(&buffer, 0); + put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), + target_types[profile->type])); + put_u32(&buffer, 0); /* FIXME: compilation flags */ + creator_position = put_u32(&buffer, 0); + + if (profile->major_version >= 5) + { + put_u32(&buffer, TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ + put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ + put_u32(&buffer, 0); /* unknown; possibly a null terminator */ + } + + /* Bound resources. */ + + resources_offset = bytecode_align(&buffer); + set_u32(&buffer, resource_position, resources_offset); + + for (i = 0; i < extern_resources_count; ++i) + { + enum hlsl_regset regset; + uint32_t flags = 0; + + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type); + + if (var->reg_reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, sm4_resource_type(var->data_type)); + if (regset == HLSL_REGSET_SAMPLERS) + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); + put_u32(&buffer, 0); + } + else + { + unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + + put_u32(&buffer, sm4_resource_format(var->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } + put_u32(&buffer, var->regs[regset].id); + put_u32(&buffer, var->regs[regset].bind_count); + put_u32(&buffer, flags); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + uint32_t flags = 0; + + if (!cbuffer->reg.allocated) + continue; + + if (cbuffer->reservation.reg_type) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + put_u32(&buffer, 0); /* return type */ + put_u32(&buffer, 0); /* dimension */ + put_u32(&buffer, 0); /* multisample count */ + put_u32(&buffer, cbuffer->reg.id); /* bind point */ + put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, flags); /* flags */ + } + + for (i = 0; i < extern_resources_count; ++i) + { + var = extern_resources[i]; + + string_offset = put_string(&buffer, var->name); + set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); + } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); + } + + /* Buffers. */ + + cbuffers_offset = bytecode_align(&buffer); + set_u32(&buffer, cbuffer_position, cbuffers_offset); + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + unsigned int var_count = 0; + + if (!cbuffer->reg.allocated) + continue; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + ++var_count; + } + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var_count); + put_u32(&buffer, 0); /* variable offset */ + put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float)); + put_u32(&buffer, 0); /* FIXME: flags */ + put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!cbuffer->reg.allocated) + continue; + + string_offset = put_string(&buffer, cbuffer->name); + set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset); + } + + i = 0; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + size_t vars_start = bytecode_align(&buffer); + + if (!cbuffer->reg.allocated) + continue; + + set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + uint32_t flags = 0; + + if (var->last_read) + flags |= D3D_SVF_USED; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, var->buffer_offset * sizeof(float)); + put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); + put_u32(&buffer, flags); + put_u32(&buffer, 0); /* type */ + put_u32(&buffer, 0); /* FIXME: default value */ + + if (profile->major_version >= 5) + { + put_u32(&buffer, 0); /* texture start */ + put_u32(&buffer, 0); /* texture count */ + put_u32(&buffer, 0); /* sampler start */ + put_u32(&buffer, 0); /* sampler count */ + } + } + } + + j = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform && var->buffer == cbuffer) + { + const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6); + size_t var_offset = vars_start + j * var_size * sizeof(uint32_t); + size_t string_offset = put_string(&buffer, var->name); + + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + ++j; + } + } + } + + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + + add_section(dxbc, TAG_RDEF, &buffer); + + vkd3d_free(extern_resources); +} + +static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) +{ + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return VKD3D_SM4_RESOURCE_TEXTURE_1D; + case HLSL_SAMPLER_DIM_2D: + return VKD3D_SM4_RESOURCE_TEXTURE_2D; + case HLSL_SAMPLER_DIM_3D: + return VKD3D_SM4_RESOURCE_TEXTURE_3D; + case HLSL_SAMPLER_DIM_CUBE: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; + case HLSL_SAMPLER_DIM_1DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; + case HLSL_SAMPLER_DIM_2DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; + case HLSL_SAMPLER_DIM_2DMS: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; + case HLSL_SAMPLER_DIM_2DMSARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; + case HLSL_SAMPLER_DIM_CUBEARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return VKD3D_SM4_RESOURCE_BUFFER; + default: + vkd3d_unreachable(); + } +} + +struct sm4_instruction_modifier +{ + enum vkd3d_sm4_instruction_modifier type; + + union + { + struct + { + int u, v, w; + } aoffimmi; + } u; +}; + +static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod) +{ + uint32_t word = 0; + + word |= VKD3D_SM4_MODIFIER_MASK & imod->type; + + switch (imod->type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: + assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); + assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); + assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); + word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; + break; + + default: + vkd3d_unreachable(); + } + + return word; +} + +struct sm4_register +{ + enum vkd3d_sm4_register_type type; + uint32_t idx[2]; + unsigned int idx_count; + enum vkd3d_sm4_dimension dim; + uint32_t immconst_uint[4]; + unsigned int mod; +}; + +struct sm4_instruction +{ + enum vkd3d_sm4_opcode opcode; + + struct sm4_instruction_modifier modifiers[1]; + unsigned int modifier_count; + + struct sm4_dst_register + { + struct sm4_register reg; + unsigned int writemask; + } dsts[2]; + unsigned int dst_count; + + struct sm4_src_register + { + struct sm4_register reg; + enum vkd3d_sm4_swizzle_type swizzle_type; + unsigned int swizzle; + } srcs[5]; + unsigned int src_count; + + unsigned int byte_stride; + + uint32_t idx[3]; + unsigned int idx_count; +}; + +static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, + unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, + const struct hlsl_deref *deref, const struct hlsl_type *data_type) +{ + const struct hlsl_ir_var *var = deref->var; + + if (var->is_uniform) + { + enum hlsl_regset regset = hlsl_type_get_regset(data_type); + + if (regset == HLSL_REGSET_TEXTURES) + { + reg->type = VKD3D_SM4_RT_RESOURCE; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_TEXTURES); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3D_SM5_RT_UAV; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_UAVS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3D_SM4_RT_SAMPLER; + reg->dim = VKD3D_SM4_DIMENSION_NONE; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + assert(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3D_SM4_RT_CONSTBUFFER; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->buffer->reg.id; + reg->idx[1] = offset / 4; + reg->idx_count = 2; + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } + else if (var->is_input_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0] = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_INPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else if (var->is_output_semantic) + { + bool has_idx; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + + if (has_idx) + { + reg->idx[0] = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + + if (reg->type == VKD3D_SM4_RT_DEPTHOUT) + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + else + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_OUTPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } + } + else + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +} + +static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, + const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) +{ + unsigned int writemask; + + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, + enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) +{ + assert(instr->reg.allocated); + reg->type = VKD3D_SM4_RT_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; +} + +static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr) +{ + unsigned int swizzle_type; + + sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); +} + +static void sm4_src_from_constant_value(struct sm4_src_register *src, + const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) +{ + src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + src->reg.type = VKD3D_SM4_RT_IMMCONST; + if (width == 1) + { + src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + src->reg.immconst_uint[0] = value->u[0].u; + } + else + { + unsigned int i, j = 0; + + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + for (i = 0; i < 4; ++i) + { + if (map_writemask & (1u << i)) + src->reg.immconst_uint[i] = value->u[j++].u; + } + } +} + +static void sm4_src_from_node(struct sm4_src_register *src, + const struct hlsl_ir_node *instr, unsigned int map_writemask) +{ + unsigned int writemask; + + if (instr->type == HLSL_IR_CONSTANT) + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + + sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); + return; + } + + sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + +static uint32_t sm4_encode_register(const struct sm4_register *reg) +{ + return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) + | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) + | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); +} + +static uint32_t sm4_register_order(const struct sm4_register *reg) +{ + uint32_t order = 1; + if (reg->type == VKD3D_SM4_RT_IMMCONST) + order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; + order += reg->idx_count; + if (reg->mod) + ++order; + return order; +} + +static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) +{ + uint32_t token = instr->opcode; + unsigned int size = 1, i, j; + + size += instr->modifier_count; + for (i = 0; i < instr->dst_count; ++i) + size += sm4_register_order(&instr->dsts[i].reg); + for (i = 0; i < instr->src_count; ++i) + size += sm4_register_order(&instr->srcs[i].reg); + size += instr->idx_count; + if (instr->byte_stride) + ++size; + + token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); + + if (instr->modifier_count > 0) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + + for (i = 0; i < instr->modifier_count; ++i) + { + token = sm4_encode_instruction_modifier(&instr->modifiers[i]); + if (instr->modifier_count > i + 1) + token |= VKD3D_SM4_INSTRUCTION_MODIFIER; + put_u32(buffer, token); + } + + for (i = 0; i < instr->dst_count; ++i) + { + token = sm4_encode_register(&instr->dsts[i].reg); + if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; + put_u32(buffer, token); + + for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) + put_u32(buffer, instr->dsts[i].reg.idx[j]); + } + + for (i = 0; i < instr->src_count; ++i) + { + token = sm4_encode_register(&instr->srcs[i].reg); + token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; + if (instr->srcs[i].reg.mod) + token |= VKD3D_SM4_EXTENDED_OPERAND; + put_u32(buffer, token); + + if (instr->srcs[i].reg.mod) + put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) + | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); + + for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) + put_u32(buffer, instr->srcs[i].reg.idx[j]); + + if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); + if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) + { + put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); + put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); + } + } + } + + if (instr->byte_stride) + put_u32(buffer, instr->byte_stride); + + for (j = 0; j < instr->idx_count; ++j) + put_u32(buffer, instr->idx[j]); +} + +static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + const struct hlsl_ir_node *texel_offset) +{ + struct sm4_instruction_modifier modif; + struct hlsl_ir_constant *offset; + + if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) + return false; + offset = hlsl_ir_constant(texel_offset); + + modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; + modif.u.aoffimmi.u = offset->value.u[0].i; + modif.u.aoffimmi.v = 0; + modif.u.aoffimmi.w = 0; + if (offset->node.data_type->dimx > 1) + modif.u.aoffimmi.v = offset->value.u[1].i; + if (offset->node.data_type->dimx > 2) + modif.u.aoffimmi.w = offset->value.u[2].i; + if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 + || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 + || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) + return false; + + instr->modifiers[instr->modifier_count++] = modif; + return true; +} + +static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) +{ + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, + .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, + .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, + .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), + .src_count = 1, + }; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + + if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; + + for (i = 0; i < count; ++i) + { + if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + + instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm4_instruction(buffer, &instr); + } +} + +static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool uav) +{ + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; + + component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + + for (i = 0; i < count; ++i) + { + if (!var->objects_usage[regset][i].used) + continue; + + instr = (struct sm4_instruction) + { + .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + + .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx_count = 1, + }; + + if (uav) + { + switch (var->data_type->sampler_dim) + { + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; + instr.byte_stride = var->data_type->e.resource_format->reg_size[HLSL_REGSET_NUMERIC] * 4; + break; + default: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; + break; + } + } + else + { + instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; + } + instr.opcode |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + write_sm4_instruction(buffer, &instr); + } +} + +static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +{ + const struct hlsl_profile_info *profile = ctx->profile; + const bool output = var->is_output_semantic; + D3D_NAME usage; + bool has_idx; + + struct sm4_instruction instr = + { + .dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, + .dst_count = 1, + }; + + if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + { + if (has_idx) + { + instr.dsts[0].reg.idx[0] = var->semantic.index; + instr.dsts[0].reg.idx_count = 1; + } + else + { + instr.dsts[0].reg.idx_count = 0; + } + instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1; + } + else + { + instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; + instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + + if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) + instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + + hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); + if (usage == ~0u) + usage = D3D_NAME_UNDEFINED; + + if (var->is_input_semantic) + { + switch (usage) + { + case D3D_NAME_UNDEFINED: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; + break; + + case D3D_NAME_INSTANCE_ID: + case D3D_NAME_PRIMITIVE_ID: + case D3D_NAME_VERTEX_ID: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; + break; + + default: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; + break; + } + + if (profile->type == VKD3D_SHADER_TYPE_PIXEL) + { + enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; + + if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) + mode = VKD3DSIM_CONSTANT; + + instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + } + } + else + { + if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; + else + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; + } + + switch (usage) + { + case D3D_NAME_COVERAGE: + case D3D_NAME_DEPTH: + case D3D_NAME_DEPTH_GREATER_EQUAL: + case D3D_NAME_DEPTH_LESS_EQUAL: + case D3D_NAME_TARGET: + case D3D_NAME_UNDEFINED: + break; + + default: + instr.idx_count = 1; + instr.idx[0] = usage; + break; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_TEMPS, + + .idx = {temp_count}, + .idx_count = 1, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, + + .idx = {thread_count[0], thread_count[1], thread_count[2]}, + .idx_count = 3, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_RET, + }; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask); + instr.srcs[0].reg.mod = src_mod; + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +/* dp# instructions don't map the swizzle. */ +static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); + instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; + + sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask); + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) +{ + bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + if (uav) + instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; + else + instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } + } + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + if (!uav) + { + /* Mipmap level is in the last component in the IR, but needs to be in the W + * component in the instruction. */ + unsigned int dim_count = hlsl_sampler_dim_count(dim); + + if (dim_count == 1) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; + if (dim_count == 2) + coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; + } + + sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); + + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + + instr.src_count = 2; + + if (multisampled) + { + if (sample_index->type == HLSL_IR_CONSTANT) + { + struct sm4_register *reg = &instr.srcs[2].reg; + struct hlsl_ir_constant *index; + + index = hlsl_ir_constant(sample_index); + + memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); + instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + reg->type = VKD3D_SM4_RT_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = index->value.u[0].u; + } + else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) + { + hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } + else + { + sm4_src_from_node(&instr.srcs[2], sample_index, 0); + } + + ++instr.src_count; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_deref *sampler = &load->sampler; + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + switch (load->load_type) + { + case HLSL_RESOURCE_SAMPLE: + instr.opcode = VKD3D_SM4_OP_SAMPLE; + break; + + case HLSL_RESOURCE_SAMPLE_CMP: + instr.opcode = VKD3D_SM4_OP_SAMPLE_C; + break; + + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; + break; + + case HLSL_RESOURCE_SAMPLE_LOD: + instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; + break; + + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + instr.opcode = VKD3D_SM4_OP_SAMPLE_B; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; + break; + + default: + vkd3d_unreachable(); + } + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } + } + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 3; + + if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD + || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) + { + sm4_src_from_node(&instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); + ++instr.src_count; + } + else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) + { + sm4_src_from_node(&instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); + instr.src_count += 2; + } + else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP + || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) + { + sm4_src_from_node(&instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); + ++instr.src_count; + } + + write_sm4_instruction(buffer, &instr); +} + +static bool type_is_float(const struct hlsl_type *type) +{ + return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; +} + +static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_AND; + + sm4_dst_from_node(&instr.dsts[0], &expr->node); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); + instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; + instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + instr.srcs[1].reg.immconst_uint[0] = mask; + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_cast(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + static const union + { + uint32_t u; + float f; + } one = { .f = 1.0 }; + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = arg1->data_type; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == dst_type->dimx); + + switch (dst_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_INT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_UINT: + switch (src_type->base_type) + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: + write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); + break; + + default: + vkd3d_unreachable(); + } + break; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); + break; + + case HLSL_TYPE_BOOL: + /* Casts to bool should have already been lowered. */ + default: + vkd3d_unreachable(); + } +} + +static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_expr(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) +{ + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; + const struct hlsl_type *dst_type = expr->node.data_type; + struct vkd3d_string_buffer *dst_type_string; + + assert(expr->node.reg.allocated); + + if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) + return; + + switch (expr->op) + { + case HLSL_OP1_ABS: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_BIT_NOT: + assert(type_is_integer(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_CAST: + write_sm4_cast(ctx, buffer, expr); + break; + + case HLSL_OP1_COS: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + + case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + + case HLSL_OP1_EXP2: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FLOOR: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FRACT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOGIC_NOT: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_REINTERPRET: + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_OP1_ROUND: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_RSQ: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP1_SIN: + assert(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + + case HLSL_OP1_SQRT: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_TRUNC: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + break; + + case HLSL_OP2_ADD: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_BIT_AND: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_OR: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + assert(type_is_integer(dst_type)); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_DIV: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_DOT: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + switch (arg1->data_type->dimx) + { + case 4: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + break; + + case 3: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + break; + + case 2: + write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + break; + + case 1: + default: + vkd3d_unreachable(); + } + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_EQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_GEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_LESS: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_LOGIC_AND: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: + assert(dst_type->base_type == HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MIN: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MOD: + switch (dst_type->base_type) + { + case HLSL_TYPE_UINT: + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_MUL: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ + write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_NEQUAL: + { + const struct hlsl_type *src_type = arg1->data_type; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", + debug_hlsl_type(ctx, src_type)); + break; + } + break; + } + + case HLSL_OP2_RSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); + write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + &expr->node, arg1, arg2); + break; + + default: + hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + } + + hlsl_release_string_buffer(ctx, dst_type_string); +} + +static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, + .src_count = 1, + }; + + assert(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &iff->else_block); + } + + instr.opcode = VKD3D_SM4_OP_ENDIF; + instr.src_count = 0; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_jump(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) +{ + struct sm4_instruction instr = {0}; + + switch (jump->type) + { + case HLSL_IR_JUMP_BREAK: + instr.opcode = VKD3D_SM4_OP_BREAK; + break; + + case HLSL_IR_JUMP_DISCARD: + { + struct sm4_register *reg = &instr.srcs[0].reg; + + instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; + + memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); + instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; + instr.src_count = 1; + reg->type = VKD3D_SM4_RT_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = ~0u; + + break; + } + + case HLSL_IR_JUMP_RETURN: + vkd3d_unreachable(); + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + return; + } + + write_sm4_instruction(buffer, &instr); +} + +/* Does this variable's data come directly from the API user, rather than being + * temporary or from a previous shader stage? + * I.e. is it a uniform or VS input? */ +static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +{ + if (var->is_uniform) + return true; + + return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; +} + +static void write_sm4_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) +{ + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + + sm4_dst_from_node(&instr.dsts[0], &load->node); + instr.dst_count = 1; + + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); + if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) + { + struct hlsl_constant_value value; + + /* Uniform bools can be specified as anything, but internal bools always + * have 0 for false and ~0 for true. Normalize that here. */ + + instr.opcode = VKD3D_SM4_OP_MOVC; + + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + + memset(&value, 0xff, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); + memset(&value, 0, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].writemask); + instr.src_count = 3; + } + else + { + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + instr.src_count = 1; + } + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_loop(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_LOOP, + }; + + write_sm4_instruction(buffer, &instr); + + write_sm4_block(ctx, buffer, &loop->body); + + instr.opcode = VKD3D_SM4_OP_ENDLOOP; + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +{ + struct sm4_src_register *src; + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + + instr.opcode = VKD3D_SM4_OP_GATHER4; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); + + if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + if (ctx->profile->major_version < 5) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return; + } + instr.opcode = VKD3D_SM5_OP_GATHER4_PO; + sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); + } + } + + sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + + src = &instr.srcs[instr.src_count++]; + sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; + src->swizzle = swizzle; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_resource_load(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) +{ + const struct hlsl_type *resource_type = load->resource.var->data_type; + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; + const struct hlsl_ir_node *coords = load->coords.node; + + if (!hlsl_type_is_resource(resource_type)) + { + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + return; + } + + if (load->sampler.var) + { + const struct hlsl_type *sampler_type = load->sampler.var->data_type; + + if (!hlsl_type_is_resource(sampler_type)) + { + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + return; + } + + if (!load->sampler.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return; + } + } + + if (!load->resource.var->is_uniform) + { + hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); + return; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, + coords, sample_index, texel_offset, load->sampling_dim); + break; + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: + if (!load->sampler.var) + { + hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); + return; + } + write_sm4_sample(ctx, buffer, load); + break; + + case HLSL_RESOURCE_GATHER_RED: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_GREEN: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_BLUE: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_ALPHA: + write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, + &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + break; + } +} + +static void write_sm4_resource_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) +{ + const struct hlsl_type *resource_type = store->resource.var->data_type; + + if (!hlsl_type_is_resource(resource_type)) + { + hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); + return; + } + + if (!store->resource.var->is_uniform) + { + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + return; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); + return; + } + + write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); +} + +static void write_sm4_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) +{ + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_swizzle(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) +{ + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + + sm4_dst_from_node(&instr.dsts[0], &swizzle->node); + instr.dst_count = 1; + + sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node); + instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), + swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); + instr.src_count = 1; + + write_sm4_instruction(buffer, &instr); +} + +static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block) +{ + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { + hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); + break; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { + hlsl_fixme(ctx, &instr->loc, "Object copy."); + break; + } + + assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + + if (!instr->reg.allocated) + { + assert(instr->type == HLSL_IR_CONSTANT); + continue; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + case HLSL_IR_CONSTANT: + vkd3d_unreachable(); + + case HLSL_IR_EXPR: + write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: + write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: + write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: + write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: + write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: + write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_LOOP: + write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_STORE: + write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: + write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } +} + +static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) +{ + const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; + struct vkd3d_bytecode_buffer buffer = {0}; + unsigned int extern_resources_count, i; + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + size_t token_count_position; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { + VKD3D_SM4_PS, + VKD3D_SM4_VS, + VKD3D_SM4_GS, + VKD3D_SM5_HS, + VKD3D_SM5_DS, + VKD3D_SM5_CS, + 0, /* EFFECT */ + 0, /* TEXTURE */ + VKD3D_SM4_LIB, + }; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); + token_count_position = put_u32(&buffer, 0); + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) + write_sm4_dcl_constant_buffer(&buffer, cbuffer); + } + + for (i = 0; i < extern_resources_count; ++i) + { + enum hlsl_regset regset; + + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type); + + if (regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, var); + else if (regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, var, false); + else if (regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, var, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) + write_sm4_dcl_semantic(ctx, &buffer, var); + } + + if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) + write_sm4_dcl_thread_group(&buffer, ctx->thread_count); + + if (ctx->temp_count) + write_sm4_dcl_temps(&buffer, ctx->temp_count); + + write_sm4_block(ctx, &buffer, &entry_func->body); + + write_sm4_ret(&buffer); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + + add_section(dxbc, TAG_SHDR, &buffer); + + vkd3d_free(extern_resources); +} + +int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +{ + struct dxbc_writer dxbc; + size_t i; + int ret; + + dxbc_writer_init(&dxbc); + + write_sm4_signature(ctx, &dxbc, false); + write_sm4_signature(ctx, &dxbc, true); + write_sm4_rdef(ctx, &dxbc); + write_sm4_shdr(ctx, entry_func, &dxbc); + + if (!(ret = ctx->result)) + ret = dxbc_writer_write(&dxbc, out); + for (i = 0; i < dxbc.section_count; ++i) + vkd3d_shader_free_shader_code(&dxbc.sections[i].data); + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index c9c15f01155..343fdb2252e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -22,6 +22,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -338,22 +340,35 @@ void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const stru va_end(args); } +size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) +{ + size_t aligned_size = align(buffer->size, 4); + + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, aligned_size, 1)) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return aligned_size; + } + + memset(buffer->data + buffer->size, 0xab, aligned_size - buffer->size); + buffer->size = aligned_size; + return aligned_size; +} + size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) { - size_t aligned_size = align(size, 4); - size_t offset = buffer->size; + size_t offset = bytecode_align(buffer); if (buffer->status) return offset; - if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + aligned_size, 1)) + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + size, 1)) { buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; return offset; } memcpy(buffer->data + offset, bytes, size); - memset(buffer->data + offset + size, 0xab, aligned_size - size); - buffer->size = offset + aligned_size; + buffer->size = offset + size; return offset; } @@ -1070,7 +1085,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info if (TRACE_ON()) { - vkd3d_shader_trace(parser); + vkd3d_shader_trace(&parser->instructions, &parser->shader_version); } for (i = 0; i < parser->instructions.count; ++i) @@ -1167,75 +1182,73 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char return ret; } -static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, +static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; + struct vkd3d_glsl_generator *glsl_generator; struct vkd3d_shader_compile_info scan_info; - struct spirv_compiler *spirv_compiler; - struct vkd3d_shader_parser *parser; int ret; + vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + scan_info = *compile_info; scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; scan_descriptor_info.next = scan_info.next; scan_info.next = &scan_descriptor_info; - if ((ret = scan_dxbc(&scan_info, message_context)) < 0) + if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) return ret; - if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) + switch (compile_info->target_type) { - WARN("Failed to initialise shader parser.\n"); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } + case VKD3D_SHADER_TARGET_D3D_ASM: + ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); + break; - vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + case VKD3D_SHADER_TARGET_GLSL: + if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, + message_context, &parser->location))) + { + ERR("Failed to create GLSL generator.\n"); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return VKD3D_ERROR; + } - if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) - { - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); - vkd3d_shader_parser_destroy(parser); - return ret; - } + ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); + vkd3d_glsl_generator_destroy(glsl_generator); + break; - if (compile_info->target_type == VKD3D_SHADER_TARGET_GLSL) - { - struct vkd3d_glsl_generator *glsl_generator; + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + case VKD3D_SHADER_TARGET_SPIRV_TEXT: + ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); + break; - if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->shader_version, - message_context, &parser->location))) - { - ERR("Failed to create GLSL generator.\n"); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return VKD3D_ERROR; - } + default: + /* Validation should prevent us from reaching this. */ + assert(0); + } - ret = vkd3d_glsl_generator_generate(glsl_generator, parser, out); + vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); + return ret; +} - vkd3d_glsl_generator_destroy(glsl_generator); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return ret; - } +static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; - if (!(spirv_compiler = spirv_compiler_create(&parser->shader_version, &parser->shader_desc, - compile_info, &scan_descriptor_info, message_context, &parser->location))) + if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) { - ERR("Failed to create DXBC compiler.\n"); - vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); - return VKD3D_ERROR; + WARN("Failed to initialise shader parser.\n"); + return ret; } - ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); + ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - spirv_compiler_destroy(spirv_compiler); vkd3d_shader_parser_destroy(parser); - vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); return ret; } @@ -1270,7 +1283,7 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ if (compile_info->target_type == VKD3D_SHADER_TARGET_D3D_ASM) { - ret = vkd3d_dxbc_binary_to_text(parser, compile_info, out); + ret = vkd3d_dxbc_binary_to_text(&parser->instructions, &parser->shader_version, compile_info, out); vkd3d_shader_parser_destroy(parser); return ret; } @@ -1388,10 +1401,54 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu desc->version = 0; } +static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, + const struct shader_signature *src) +{ + unsigned int i; + + signature->element_count = src->element_count; + if (!src->elements) + { + assert(!signature->element_count); + signature->elements = NULL; + return true; + } + + if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) + return false; + + for (i = 0; i < signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *d = &signature->elements[i]; + struct signature_element *e = &src->elements[i]; + + d->semantic_name = e->semantic_name; + d->semantic_index = e->semantic_index; + d->stream_index = e->stream_index; + d->sysval_semantic = e->sysval_semantic; + d->component_type = e->component_type; + d->register_index = e->register_index; + if (e->register_count > 1) + FIXME("Arrayed elements are not supported yet.\n"); + d->mask = e->mask; + d->used_mask = e->used_mask; + d->min_precision = e->min_precision; + } + + return true; +} + +void shader_signature_cleanup(struct shader_signature *signature) +{ + vkd3d_free(signature->elements); + signature->elements = NULL; +} + int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_signature *signature, char **messages) { struct vkd3d_shader_message_context message_context; + struct shader_signature shader_signature; int ret; TRACE("dxbc {%p, %zu}, signature %p, messages %p.\n", dxbc->code, dxbc->size, signature, messages); @@ -1400,13 +1457,17 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, *messages = NULL; vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); - ret = shader_parse_input_signature(dxbc, &message_context, signature); + ret = shader_parse_input_signature(dxbc, &message_context, &shader_signature); vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) ret = VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_message_context_cleanup(&message_context); + if (!vkd3d_shader_signature_from_shader_signature(signature, &shader_signature)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; + + shader_signature_cleanup(&shader_signature); return ret; } @@ -1642,6 +1703,84 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins return true; } +static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, + unsigned int count); + +static bool shader_register_clone_relative_addresses(struct vkd3d_shader_register *reg, + struct vkd3d_shader_instruction_array *instructions) +{ + unsigned int i; + + for (i = 0; i < reg->idx_count; ++i) + { + if (!reg->idx[i].rel_addr) + continue; + + if (!(reg->idx[i].rel_addr = shader_instruction_array_clone_src_params(instructions, reg->idx[i].rel_addr, 1))) + return false; + } + + return true; +} + +static struct vkd3d_shader_dst_param *shader_instruction_array_clone_dst_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_dst_param *params, + unsigned int count) +{ + struct vkd3d_shader_dst_param *dst_params; + unsigned int i; + + if (!(dst_params = shader_dst_param_allocator_get(&instructions->dst_params, count))) + return NULL; + + memcpy(dst_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&dst_params[i].reg, instructions)) + return NULL; + } + + return dst_params; +} + +static struct vkd3d_shader_src_param *shader_instruction_array_clone_src_params( + struct vkd3d_shader_instruction_array *instructions, const struct vkd3d_shader_src_param *params, + unsigned int count) +{ + struct vkd3d_shader_src_param *src_params; + unsigned int i; + + if (!(src_params = shader_src_param_allocator_get(&instructions->src_params, count))) + return NULL; + + memcpy(src_params, params, count * sizeof(*params)); + for (i = 0; i < count; ++i) + { + if (!shader_register_clone_relative_addresses(&src_params[i].reg, instructions)) + return NULL; + } + + return src_params; +} + +/* NOTE: Immediate constant buffers are not cloned, so the source must not be destroyed while the + * destination is in use. This seems like a reasonable requirement given how this is currently used. */ +bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src) +{ + struct vkd3d_shader_instruction *ins = &instructions->elements[dst]; + + *ins = instructions->elements[src]; + + if (ins->dst_count && ins->dst && !(ins->dst = shader_instruction_array_clone_dst_params(instructions, + ins->dst, ins->dst_count))) + return false; + + return !ins->src_count || !!(ins->src = shader_instruction_array_clone_src_params(instructions, + ins->src, ins->src_count)); +} + void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions) { unsigned int i; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 79be999adf9..406d53a3391 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -49,7 +49,7 @@ #include "vkd3d_common.h" #include "vkd3d_memory.h" #include "vkd3d_shader.h" -#include "wine/list.h" +#include "list.h" #include #include @@ -74,6 +74,13 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_MISMATCHED_CF = 1000, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_RANGE = 1001, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY = 1002, + VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_INDEX_COUNT = 1003, + VKD3D_SHADER_ERROR_TPF_TOO_MANY_REGISTERS = 1004, + VKD3D_SHADER_ERROR_TPF_INVALID_IO_REGISTER = 1005, + VKD3D_SHADER_ERROR_TPF_INVALID_INDEX_RANGE_DCL = 1006, + + VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, + VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, VKD3D_SHADER_ERROR_SPV_DESCRIPTOR_BINDING_NOT_FOUND = 2000, VKD3D_SHADER_ERROR_SPV_INVALID_REGISTER_TYPE = 2001, @@ -125,6 +132,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -223,6 +231,7 @@ enum vkd3d_shader_opcode VKD3DSIH_DEQ, VKD3DSIH_DFMA, VKD3DSIH_DGE, + VKD3DSIH_DISCARD, VKD3DSIH_DIV, VKD3DSIH_DLT, VKD3DSIH_DMAX, @@ -675,6 +684,7 @@ struct vkd3d_shader_register bool non_uniform; enum vkd3d_data_type data_type; struct vkd3d_shader_register_index idx[3]; + unsigned int idx_count; enum vkd3d_immconst_type immconst_type; union { @@ -774,13 +784,36 @@ enum vkd3d_shader_input_sysval_semantic VKD3D_SIV_LINE_DENSITY_TESS_FACTOR = 22, }; +struct signature_element +{ + unsigned int sort_index; + const char *semantic_name; + unsigned int semantic_index; + unsigned int stream_index; + enum vkd3d_shader_sysval_semantic sysval_semantic; + enum vkd3d_shader_component_type component_type; + unsigned int register_index; + unsigned int register_count; + unsigned int mask; + unsigned int used_mask; + enum vkd3d_shader_minimum_precision min_precision; +}; + +struct shader_signature +{ + struct signature_element *elements; + unsigned int element_count; +}; + +void shader_signature_cleanup(struct shader_signature *signature); + struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; - struct vkd3d_shader_signature input_signature; - struct vkd3d_shader_signature output_signature; - struct vkd3d_shader_signature patch_constant_signature; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; }; struct vkd3d_shader_register_semantic @@ -927,6 +960,11 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT; } +static inline bool vkd3d_shader_register_is_patch_constant(const struct vkd3d_shader_register *reg) +{ + return reg->type == VKD3DSPR_PATCHCONST; +} + struct vkd3d_shader_location { const char *source_name; @@ -981,6 +1019,8 @@ bool shader_instruction_array_init(struct vkd3d_shader_instruction_array *instru bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *instructions, unsigned int reserve); bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *instructions, struct vkd3d_shader_immediate_constant_buffer *icb); +bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, + unsigned int dst, unsigned int src); void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); struct vkd3d_shader_parser @@ -991,10 +1031,8 @@ struct vkd3d_shader_parser struct vkd3d_shader_desc shader_desc; struct vkd3d_shader_version shader_version; - const uint32_t *ptr; const struct vkd3d_shader_parser_ops *ops; struct vkd3d_shader_instruction_array instructions; - size_t instruction_idx; }; struct vkd3d_shader_parser_ops @@ -1028,7 +1066,8 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse parser->ops->parser_destroy(parser); } -void vkd3d_shader_trace(struct vkd3d_shader_parser *parser); +void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version); const char *shader_get_type_prefix(enum vkd3d_shader_type type); @@ -1044,8 +1083,9 @@ struct vkd3d_string_buffer_cache size_t count, max_count, capacity; }; -enum vkd3d_result vkd3d_dxbc_binary_to_text(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out); +enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out); void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); @@ -1067,6 +1107,8 @@ struct vkd3d_bytecode_buffer int status; }; +/* Align to the next 4-byte offset, and return that offset. */ +size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer); size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value); @@ -1128,8 +1170,10 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi void free_shader_desc(struct vkd3d_shader_desc *desc); +int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_signature *signature); + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); struct vkd3d_glsl_generator; @@ -1141,16 +1185,10 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); #define SPIRV_MAX_SRC_COUNT 6 -struct spirv_compiler; - -struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +int spirv_compile(struct vkd3d_shader_parser *parser, const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); -int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, - struct vkd3d_shader_code *spirv); -void spirv_compiler_destroy(struct spirv_compiler *compiler); + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); @@ -1202,6 +1240,14 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type( } } +enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index); + +static inline enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_shader_sysval_semantic sysval) +{ + return vkd3d_siv_from_sysval_indexed(sysval, 0); +} + static inline unsigned int vkd3d_write_mask_get_component_idx(DWORD write_mask) { unsigned int i; @@ -1323,4 +1369,11 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); +enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *instructions); +enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( + struct vkd3d_shader_instruction_array *instructions, const struct shader_signature *input_signature); +enum vkd3d_result instruction_array_normalise_io_registers(struct vkd3d_shader_instruction_array *instructions, + enum vkd3d_shader_type shader_type, struct shader_signature *input_signature, + struct shader_signature *output_signature, struct shader_signature *patch_constant_signature); + #endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 6eddcfa2d14..32439eec7eb 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -1437,7 +1437,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; - pool_desc.poolSizeCount = ARRAY_SIZE(device->vk_pool_sizes); + pool_desc.poolSizeCount = device->vk_pool_count; pool_desc.pPoolSizes = device->vk_pool_sizes; if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { @@ -2463,6 +2463,8 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, memset(list->so_counter_buffers, 0, sizeof(list->so_counter_buffers)); memset(list->so_counter_buffer_offsets, 0, sizeof(list->so_counter_buffer_offsets)); + list->descriptor_heap_count = 0; + ID3D12GraphicsCommandList2_SetPipelineState(iface, initial_pipeline_state); } @@ -2720,28 +2722,31 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des unsigned int index, bool use_array) { uint32_t descriptor_range_magic = range->descriptor_magic; - const struct vkd3d_view *view = descriptor->s.u.view_info.view; + union d3d12_desc_object u = descriptor->s.u; uint32_t vk_binding = range->binding; + VkDescriptorType vk_descriptor_type; uint32_t set = range->set; - if (descriptor->s.magic != descriptor_range_magic) + if (!u.header || u.header->magic != descriptor_range_magic) return false; + vk_descriptor_type = u.header->vk_descriptor_type; + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_write->pNext = NULL; vk_descriptor_write->dstSet = vk_descriptor_sets[set]; vk_descriptor_write->dstBinding = use_array ? vk_binding : vk_binding + index; vk_descriptor_write->dstArrayElement = use_array ? index : 0; vk_descriptor_write->descriptorCount = 1; - vk_descriptor_write->descriptorType = descriptor->s.vk_descriptor_type; + vk_descriptor_write->descriptorType = vk_descriptor_type; vk_descriptor_write->pImageInfo = NULL; vk_descriptor_write->pBufferInfo = NULL; vk_descriptor_write->pTexelBufferView = NULL; - switch (descriptor->s.magic) + switch (u.header->magic) { case VKD3D_DESCRIPTOR_MAGIC_CBV: - vk_descriptor_write->pBufferInfo = &descriptor->s.u.vk_cbv_info; + vk_descriptor_write->pBufferInfo = &u.cb_desc->vk_cbv_info; break; case VKD3D_DESCRIPTOR_MAGIC_SRV: @@ -2752,8 +2757,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des * in pairs in one set. */ if (range->descriptor_count == UINT_MAX) { - if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; vk_descriptor_write->dstBinding = 0; @@ -2763,21 +2768,21 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des { if (!use_array) vk_descriptor_write->dstBinding = vk_binding + 2 * index; - if (descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - && descriptor->s.vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) ++vk_descriptor_write->dstBinding; } - if (descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - || descriptor->s.vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) { - vk_descriptor_write->pTexelBufferView = &view->u.vk_buffer_view; + vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; } else { vk_image_info->sampler = VK_NULL_HANDLE; - vk_image_info->imageView = view->u.vk_image_view; - vk_image_info->imageLayout = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_SRV + vk_image_info->imageView = u.view->v.u.vk_image_view; + vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; vk_descriptor_write->pImageInfo = vk_image_info; @@ -2785,7 +2790,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: - vk_image_info->sampler = view->u.vk_sampler; + vk_image_info->sampler = u.view->v.u.vk_sampler; vk_image_info->imageView = VK_NULL_HANDLE; vk_image_info->imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -2793,7 +2798,7 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des break; default: - ERR("Invalid descriptor %#x.\n", descriptor->s.magic); + ERR("Invalid descriptor %#x.\n", u.header->magic); return false; } @@ -2847,6 +2852,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list for (j = 0; j < descriptor_count; ++j, ++descriptor) { unsigned int register_idx = range->base_register_idx + j; + union d3d12_desc_object u = descriptor->s.u; + VkBufferView vk_counter_view; + + vk_counter_view = (u.header && u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV) + ? u.view->v.vk_counter_view : VK_NULL_HANDLE; /* Track UAV counters. */ if (range->descriptor_magic == VKD3D_DESCRIPTOR_MAGIC_UAV) @@ -2856,8 +2866,6 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list if (state->uav_counters.bindings[k].register_space == range->register_space && state->uav_counters.bindings[k].register_index == register_idx) { - VkBufferView vk_counter_view = descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV - ? descriptor->s.u.view_info.view->vk_counter_view : VK_NULL_HANDLE; if (bindings->vk_uav_counter_views[k] != vk_counter_view) bindings->uav_counters_dirty = true; bindings->vk_uav_counter_views[k] = vk_counter_view; @@ -2867,7 +2875,7 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list } /* Not all descriptors are necessarily populated if the range is unbounded. */ - if (descriptor->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) + if (!u.header) continue; if (!vk_write_descriptor_set_from_d3d12_desc(current_descriptor_write, current_image_info, @@ -3153,6 +3161,30 @@ static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_lis } } +static bool contains_heap(struct d3d12_descriptor_heap **heap_array, unsigned int count, + const struct d3d12_descriptor_heap *query) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + if (heap_array[i] == query) + return true; + return false; +} + +static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) +{ + struct d3d12_device *device = list->device; + unsigned int i; + + for (i = 0; i < list->descriptor_heap_count; ++i) + { + vkd3d_mutex_lock(&list->descriptor_heaps[i]->vk_sets_mutex); + d3d12_desc_flush_vk_heap_updates_locked(list->descriptor_heaps[i], device); + vkd3d_mutex_unlock(&list->descriptor_heaps[i]->vk_sets_mutex); + } +} + static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) { @@ -3177,10 +3209,18 @@ static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *l bindings->sampler_heap_id = heap->serial_id; } - /* These sets can be shared across multiple command lists, and therefore binding must - * be synchronised. On an experimental branch in which caching of Vk descriptor writes - * greatly increased the chance of multiple threads arriving here at the same time, - * GRID 2019 crashed without the mutex lock. */ + if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) + { + if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) + { + /* Descriptors can be written after binding. */ + FIXME("Flushing descriptor updates while list %p is not closed.\n", list); + command_list_flush_vk_heap_updates(list); + list->descriptor_heap_count = 0; + } + list->descriptor_heaps[list->descriptor_heap_count++] = heap; + } + vkd3d_mutex_lock(&heap->vk_sets_mutex); for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) @@ -3963,10 +4003,12 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo vk_viewports[i].minDepth = viewports[i].MinDepth; vk_viewports[i].maxDepth = viewports[i].MaxDepth; - if (!vk_viewports[i].width || !vk_viewports[i].height) + if (vk_viewports[i].width <= 0.0f) { - FIXME_ONCE("Invalid viewport %u, ignoring RSSetViewports().\n", i); - return; + /* Vulkan does not support width <= 0 */ + FIXME_ONCE("Setting invalid viewport %u to zero height.\n", i); + vk_viewports[i].width = 1.0f; + vk_viewports[i].height = 0.0f; } } @@ -4481,11 +4523,20 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, root_parameter = root_signature_get_root_descriptor(root_signature, index); assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); - resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); - buffer_info.buffer = resource->u.vk_buffer; - buffer_info.offset = gpu_address - resource->gpu_address; - buffer_info.range = resource->desc.Width - buffer_info.offset; - buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); + if (gpu_address) + { + resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); + buffer_info.buffer = resource->u.vk_buffer; + buffer_info.offset = gpu_address - resource->gpu_address; + buffer_info.range = resource->desc.Width - buffer_info.offset; + buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); + } + else + { + buffer_info.buffer = list->device->null_resources.vk_buffer; + buffer_info.offset = 0; + buffer_info.range = VK_WHOLE_SIZE; + } if (vk_info->KHR_push_descriptor) { @@ -4547,13 +4598,13 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); /* FIXME: Re-use buffer views. */ - if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, &vk_buffer_view)) + if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) { ERR("Failed to create buffer view.\n"); return; } - if (!(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) + if (vk_buffer_view && !(d3d12_command_allocator_add_buffer_view(list->allocator, vk_buffer_view))) { ERR("Failed to add buffer view.\n"); VK_CALL(vkDestroyBufferView(vk_device, vk_buffer_view, NULL)); @@ -4644,6 +4695,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics WARN("Ignoring NULL index buffer view.\n"); return; } + if (!view->BufferLocation) + { + WARN("Ignoring index buffer location 0.\n"); + return; + } vk_procs = &list->device->vk_procs; @@ -4844,7 +4900,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi WARN("Failed to add view.\n"); } - list->rtvs[i] = view->u.vk_image_view; + list->rtvs[i] = view->v.u.vk_image_view; list->fb_width = max(list->fb_width, rtv_desc->width); list->fb_height = max(list->fb_height, rtv_desc->height); list->fb_layer_count = max(list->fb_layer_count, rtv_desc->layer_count); @@ -4868,7 +4924,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12Graphi list->dsv = VK_NULL_HANDLE; } - list->dsv = view->u.vk_image_view; + list->dsv = view->v.u.vk_image_view; list->fb_width = max(list->fb_width, dsv_desc->width); list->fb_height = max(list->fb_height, dsv_desc->height); list->fb_layer_count = max(list->fb_layer_count, dsv_desc->layer_count); @@ -4960,7 +5016,7 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, fb_desc.flags = 0; fb_desc.renderPass = vk_render_pass; fb_desc.attachmentCount = 1; - fb_desc.pAttachments = &view->u.vk_image_view; + fb_desc.pAttachments = &view->v.u.vk_image_view; fb_desc.width = width; fb_desc.height = height; fb_desc.layers = layer_count; @@ -5163,13 +5219,14 @@ static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clea } static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, + struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, unsigned int rect_count, const D3D12_RECT *rects) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; unsigned int i, miplevel_idx, layer_count; struct vkd3d_uav_clear_pipeline pipeline; struct vkd3d_uav_clear_args clear_args; + const struct vkd3d_resource_view *view; VkDescriptorImageInfo image_info; D3D12_RECT full_rect, curr_rect; VkWriteDescriptorSet write_set; @@ -5181,8 +5238,9 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, d3d12_command_list_invalidate_bindings(list, list->state); d3d12_command_list_invalidate_root_parameters(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); - if (!d3d12_command_allocator_add_view(list->allocator, view)) + if (!d3d12_command_allocator_add_view(list->allocator, descriptor)) WARN("Failed to add view.\n"); + view = &descriptor->v; clear_args.colour = *clear_colour; @@ -5295,10 +5353,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; - struct vkd3d_view *view, *uint_view = NULL; struct vkd3d_texture_view_desc view_desc; const struct vkd3d_format *uint_format; + const struct vkd3d_resource_view *view; struct d3d12_resource *resource_impl; VkClearColorValue colour; @@ -5306,7 +5365,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; + if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; + view = &descriptor->v; memcpy(colour.uint32, values, sizeof(colour.uint32)); if (view->format->type != VKD3D_FORMAT_TYPE_UINT) @@ -5320,8 +5381,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID if (d3d12_resource_is_buffer(resource_impl)) { - if (!vkd3d_create_buffer_view(device, resource_impl->u.vk_buffer, uint_format, - view->info.buffer.offset, view->info.buffer.size, &uint_view)) + if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, + uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) { ERR("Failed to create buffer view.\n"); return; @@ -5337,16 +5398,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID view_desc.layer_idx = view->info.texture.layer_idx; view_desc.layer_count = view->info.texture.layer_count; - if (!vkd3d_create_texture_view(device, resource_impl->u.vk_image, &view_desc, &uint_view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, + &uint_view)) { ERR("Failed to create image view.\n"); return; } } - view = uint_view; + descriptor = uint_view; } - d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); if (uint_view) vkd3d_view_decref(uint_view, device); @@ -5365,7 +5427,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); - view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view_info.view; + if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; memcpy(colour.float32, values, sizeof(colour.float32)); d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); @@ -5906,6 +5969,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors : d3d12_command_list_update_descriptors; + list->descriptor_heap_count = 0; if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) { @@ -6199,6 +6263,8 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm return; } + command_list_flush_vk_heap_updates(cmd_list); + buffers[i] = cmd_list->vk_command_buffer; } diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 39a5ca013c7..4263dcf4184 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -19,6 +19,8 @@ #include "vkd3d_private.h" #include "vkd3d_version.h" +#define VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE 256u + struct vkd3d_struct { enum vkd3d_structure_type type; @@ -2393,9 +2395,23 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) WARN("Found no acceptable host time domain. Calibrated timestamps will not be available.\n"); } -static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, - const struct vkd3d_device_descriptor_limits *limits) +static void device_init_descriptor_pool_sizes(struct d3d12_device *device) { + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; + + if (device->use_vk_heaps) + { + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, + VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; + device->vk_pool_count = 2; + return; + } + + assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); @@ -2412,8 +2428,27 @@ static void vkd3d_init_descriptor_pool_sizes(VkDescriptorPoolSize *pool_sizes, pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + device->vk_pool_count = 6; }; +static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) +{ + cache->head = NULL; + cache->size = size; +} + +static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) +{ + union d3d12_desc_object u; + void *next; + + for (u.object = cache->head; u.object; u.object = next) + { + next = u.header->next; + vkd3d_free(u.object); + } +} + /* ID3D12Device */ static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) { @@ -2454,7 +2489,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) { struct d3d12_device *device = impl_from_ID3D12Device(iface); ULONG refcount = InterlockedDecrement(&device->refcount); - size_t i; TRACE("%p decreasing refcount to %u.\n", device, refcount); @@ -2474,8 +2508,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_destroy(&device->desc_mutex[i]); + vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); + vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3368,132 +3402,6 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void flush_desc_writes(struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - enum vkd3d_vk_descriptor_set_index set; - for (set = 0; set < VKD3D_SET_INDEX_COUNT; ++set) - { - if (!infos[set].count) - continue; - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -static void d3d12_desc_buffered_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_desc_copy_location locations[][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE], - struct d3d12_desc_copy_info *infos, struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) -{ - struct d3d12_desc_copy_location *location; - enum vkd3d_vk_descriptor_set_index set; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_FREE) - { - /* Source must be unlocked first, and therefore can't be used as a null source. */ - static const struct d3d12_desc null = {0}; - vkd3d_mutex_unlock(mutex); - d3d12_desc_write_atomic(dst, &null, device); - return; - } - - set = vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(src->s.vk_descriptor_type); - location = &locations[set][infos[set].count++]; - - location->src.s = src->s; - - if (location->src.s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(location->src.s.u.view_info.view); - - vkd3d_mutex_unlock(mutex); - - infos[set].uav_counter |= (location->src.s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV) - && !!location->src.s.u.view_info.view->vk_counter_view; - location->dst = dst; - - if (infos[set].count == ARRAY_SIZE(locations[0])) - { - d3d12_desc_copy_vk_heap_range(locations[set], &infos[set], descriptor_heap, set, device); - infos[set].count = 0; - infos[set].uav_counter = false; - } -} - -/* Some games, e.g. Control, copy a large number of descriptors per frame, so the - * speed of this function is critical. */ -static void d3d12_device_vk_heaps_copy_descriptors(struct d3d12_device *device, - UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, - const UINT *dst_descriptor_range_sizes, - UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, - const UINT *src_descriptor_range_sizes) -{ - struct d3d12_desc_copy_location locations[VKD3D_SET_INDEX_COUNT][VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; - /* The locations array is relatively large, and often mostly empty. Keeping these - * values together in a separate array will likely result in fewer cache misses. */ - struct d3d12_desc_copy_info infos[VKD3D_SET_INDEX_COUNT]; - struct d3d12_descriptor_heap *descriptor_heap = NULL; - const struct d3d12_desc *src, *heap_base, *heap_end; - unsigned int dst_range_size, src_range_size; - struct d3d12_desc *dst; - - descriptor_heap = d3d12_desc_get_descriptor_heap(d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[0])); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - - memset(infos, 0, sizeof(infos)); - dst_range_idx = dst_idx = 0; - src_range_idx = src_idx = 0; - while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) - { - dst_range_size = dst_descriptor_range_sizes ? dst_descriptor_range_sizes[dst_range_idx] : 1; - src_range_size = src_descriptor_range_sizes ? src_descriptor_range_sizes[src_range_idx] : 1; - - dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); - src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - - if (dst < heap_base || dst >= heap_end) - { - flush_desc_writes(locations, infos, descriptor_heap, device); - descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - heap_base = (const struct d3d12_desc *)descriptor_heap->descriptors; - heap_end = heap_base + descriptor_heap->desc.NumDescriptors; - } - - for (; dst_idx < dst_range_size && src_idx < src_range_size; src_idx++, dst_idx++) - { - /* We don't need to lock either descriptor for the identity check. The descriptor - * mutex is only intended to prevent use-after-free of the vkd3d_view caused by a - * race condition in the calling app. It is unnecessary to protect this test as it's - * the app's race condition, not ours. */ - if (dst[dst_idx].s.magic == src[src_idx].s.magic && (dst[dst_idx].s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && dst[dst_idx].s.u.view_info.written_serial_id == src[src_idx].s.u.view_info.view->serial_id) - continue; - d3d12_desc_buffered_copy_atomic(&dst[dst_idx], &src[src_idx], locations, infos, descriptor_heap, device); - } - - if (dst_idx >= dst_range_size) - { - ++dst_range_idx; - dst_idx = 0; - } - if (src_idx >= src_range_size) - { - ++src_range_idx; - src_idx = 0; - } - } - - flush_desc_writes(locations, infos, descriptor_heap, device); -} - -#define VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT 8 - static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, @@ -3525,15 +3433,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, if (!dst_descriptor_range_count) return; - if (device->use_vk_heaps && (dst_descriptor_range_count > 1 || (dst_descriptor_range_sizes - && dst_descriptor_range_sizes[0] >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT))) - { - d3d12_device_vk_heaps_copy_descriptors(device, dst_descriptor_range_count, dst_descriptor_range_offsets, - dst_descriptor_range_sizes, src_descriptor_range_count, src_descriptor_range_offsets, - src_descriptor_range_sizes); - return; - } - dst_range_idx = dst_idx = 0; src_range_idx = src_idx = 0; while (dst_range_idx < dst_descriptor_range_count && src_range_idx < src_descriptor_range_count) @@ -3544,8 +3443,12 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, dst = d3d12_desc_from_cpu_handle(dst_descriptor_range_offsets[dst_range_idx]); src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]); - while (dst_idx < dst_range_size && src_idx < src_range_size) - d3d12_desc_copy(&dst[dst_idx++], &src[src_idx++], device); + for (; dst_idx < dst_range_size && src_idx < src_range_size; ++dst_idx, ++src_idx) + { + if (dst[dst_idx].s.u.object == src[src_idx].s.u.object) + continue; + d3d12_desc_copy(&dst[dst_idx], &src[src_idx], device); + } if (dst_idx >= dst_range_size) { @@ -3570,17 +3473,6 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device *i iface, descriptor_count, dst_descriptor_range_offset.ptr, src_descriptor_range_offset.ptr, descriptor_heap_type); - if (descriptor_count >= VKD3D_DESCRIPTOR_OPTIMISED_COPY_MIN_COUNT) - { - struct d3d12_device *device = impl_from_ID3D12Device(iface); - if (device->use_vk_heaps) - { - d3d12_device_vk_heaps_copy_descriptors(device, 1, &dst_descriptor_range_offset, - &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count); - return; - } - } - d3d12_device_CopyDescriptors(iface, 1, &dst_descriptor_range_offset, &descriptor_count, 1, &src_descriptor_range_offset, &descriptor_count, descriptor_heap_type); } @@ -4080,7 +3972,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - size_t i; device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -4123,10 +4014,10 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, device->blocked_queue_count = 0; vkd3d_mutex_init(&device->blocked_queues_mutex); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - vkd3d_mutex_init(&device->desc_mutex[i]); + vkd3d_desc_object_cache_init(&device->view_desc_cache, sizeof(struct vkd3d_view)); + vkd3d_desc_object_cache_init(&device->cbuffer_desc_cache, sizeof(struct vkd3d_cbuffer_desc)); - vkd3d_init_descriptor_pool_sizes(device->vk_pool_sizes, &device->vk_info.descriptor_limits); + device_init_descriptor_pool_sizes(device); if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent); diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index 8c050cfeb32..ea7b6859cc1 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -326,6 +326,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap) vkd3d_private_store_destroy(&heap->private_store); + if (heap->map_ptr) + VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); + VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL)); vkd3d_mutex_destroy(&heap->mutex); @@ -346,12 +349,19 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_Release(ID3D12Heap *iface) TRACE("%p decreasing refcount to %u.\n", heap, refcount); - if (!refcount) + /* A heap must not be destroyed until all contained resources are destroyed. */ + if (!refcount && !heap->resource_count) d3d12_heap_destroy(heap); return refcount; } +static void d3d12_heap_resource_destroyed(struct d3d12_heap *heap) +{ + if (!InterlockedDecrement(&heap->resource_count) && (!heap->refcount || heap->is_private)) + d3d12_heap_destroy(heap); +} + static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(ID3D12Heap *iface, REFGUID guid, UINT *data_size, void *data) { @@ -437,97 +447,6 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) return impl_from_ID3D12Heap(iface); } -static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, - struct d3d12_resource *resource, void **data) -{ - struct d3d12_device *device = heap->device; - HRESULT hr = S_OK; - VkResult vr; - - vkd3d_mutex_lock(&heap->mutex); - - assert(!resource->map_count || heap->map_ptr); - - if (!resource->map_count) - { - if (!heap->map_ptr) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("Mapping heap %p.\n", heap); - - assert(!heap->map_count); - - if ((vr = VK_CALL(vkMapMemory(device->vk_device, heap->vk_memory, - 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) - { - WARN("Failed to map device memory, vr %d.\n", vr); - heap->map_ptr = NULL; - } - - hr = hresult_from_vk_result(vr); - } - - if (heap->map_ptr) - ++heap->map_count; - } - - if (hr == S_OK) - { - assert(heap->map_ptr); - if (data) - *data = (BYTE *)heap->map_ptr + offset; - ++resource->map_count; - } - else - { - assert(!heap->map_ptr); - if (data) - *data = NULL; - } - - vkd3d_mutex_unlock(&heap->mutex); - - return hr; -} - -static void d3d12_heap_unmap(struct d3d12_heap *heap, struct d3d12_resource *resource) -{ - struct d3d12_device *device = heap->device; - - vkd3d_mutex_lock(&heap->mutex); - - if (!resource->map_count) - { - WARN("Resource %p is not mapped.\n", resource); - goto done; - } - - --resource->map_count; - if (resource->map_count) - goto done; - - if (!heap->map_count) - { - ERR("Heap %p is not mapped.\n", heap); - goto done; - } - - --heap->map_count; - if (!heap->map_count) - { - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - - TRACE("Unmapping heap %p, ptr %p.\n", heap, heap->map_ptr); - - VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); - heap->map_ptr = NULL; - } - -done: - vkd3d_mutex_unlock(&heap->mutex); -} - static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) { if (!resource && !desc->SizeInBytes) @@ -552,15 +471,23 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1 return S_OK; } +static VkMemoryPropertyFlags d3d12_heap_get_memory_property_flags(const struct d3d12_heap *heap) +{ + return heap->device->memory_properties.memoryTypes[heap->vk_memory_type].propertyFlags; +} + static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkMemoryRequirements memory_requirements; VkDeviceSize vk_memory_size; + VkResult vr; HRESULT hr; heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; heap->refcount = 1; + heap->resource_count = 0; heap->is_private = !!resource; @@ -628,6 +555,20 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, heap->device = device; if (!heap->is_private) d3d12_device_add_ref(heap->device); + else + heap->resource_count = 1; + + if (d3d12_heap_get_memory_property_flags(heap) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + if ((vr = VK_CALL(vkMapMemory(device->vk_device, + heap->vk_memory, 0, VK_WHOLE_SIZE, 0, &heap->map_ptr))) < 0) + { + heap->map_ptr = NULL; + ERR("Failed to map memory, vr %d.\n", vr); + d3d12_heap_destroy(heap); + return hresult_from_vk_result(hr); + } + } return S_OK; } @@ -1027,8 +968,8 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 else VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); - if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) - d3d12_heap_destroy(resource->heap); + if (resource->heap) + d3d12_heap_resource_destroyed(resource->heap); } static ULONG d3d12_resource_incref(struct d3d12_resource *resource) @@ -1223,12 +1164,55 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource *iface, return d3d12_device_query_interface(resource->device, iid, device); } +static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) +{ + assert(resource->heap->map_ptr); + return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; +} + +static void d3d12_resource_get_vk_range(struct d3d12_resource *resource, + uint64_t offset, uint64_t size, VkMappedMemoryRange *vk_range) +{ + vk_range->sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + vk_range->pNext = NULL; + vk_range->memory = resource->heap->vk_memory; + vk_range->offset = resource->heap_offset + offset; + vk_range->size = size; +} + +static void d3d12_resource_invalidate(struct d3d12_resource *resource, uint64_t offset, uint64_t size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkMappedMemoryRange vk_range; + VkResult vr; + + if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + return; + + d3d12_resource_get_vk_range(resource, offset, size, &vk_range); + if ((vr = VK_CALL(vkInvalidateMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) + ERR("Failed to invalidate memory, vr %d.\n", vr); +} + +static void d3d12_resource_flush(struct d3d12_resource *resource, uint64_t offset, uint64_t size) +{ + const struct vkd3d_vk_device_procs *vk_procs = &resource->device->vk_procs; + VkMappedMemoryRange vk_range; + VkResult vr; + + if (d3d12_heap_get_memory_property_flags(resource->heap) & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + return; + + d3d12_resource_get_vk_range(resource, offset, size, &vk_range); + if ((vr = VK_CALL(vkFlushMappedMemoryRanges(resource->device->vk_device, 1, &vk_range))) < 0) + ERR("Failed to flush memory, vr %d.\n", vr); +} + static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT sub_resource, const D3D12_RANGE *read_range, void **data) { struct d3d12_resource *resource = impl_from_ID3D12Resource(iface); unsigned int sub_resource_count; - HRESULT hr; TRACE("iface %p, sub_resource %u, read_range %p, data %p.\n", iface, sub_resource, read_range, data); @@ -1259,15 +1243,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT return E_NOTIMPL; } - WARN("Ignoring read range %p.\n", read_range); - - if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - if (data) + { + *data = d3d12_resource_get_map_ptr(resource); TRACE("Returning pointer %p.\n", *data); + } - return hr; + if (!read_range) + d3d12_resource_invalidate(resource, 0, resource->desc.Width); + else if (read_range->End > read_range->Begin) + d3d12_resource_invalidate(resource, read_range->Begin, read_range->End - read_range->Begin); + + return S_OK; } static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT sub_resource, @@ -1286,9 +1273,10 @@ static void STDMETHODCALLTYPE d3d12_resource_Unmap(ID3D12Resource *iface, UINT s return; } - WARN("Ignoring written range %p.\n", written_range); - - d3d12_heap_unmap(resource->heap, resource); + if (!written_range) + d3d12_resource_flush(resource, 0, resource->desc.Width); + else if (written_range->End > written_range->Begin) + d3d12_resource_flush(resource, written_range->Begin, written_range->End - written_range->Begin); } static D3D12_RESOURCE_DESC * STDMETHODCALLTYPE d3d12_resource_GetDesc(ID3D12Resource *iface, @@ -1320,10 +1308,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; + uint64_t dst_offset, dst_size; struct d3d12_device *device; uint8_t *dst_data; D3D12_BOX box; - HRESULT hr; TRACE("iface %p, src_data %p, src_row_pitch %u, src_slice_pitch %u, " "dst_sub_resource %u, dst_box %s.\n", @@ -1381,20 +1369,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_WriteToSubresource(ID3D12Resourc TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); - if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&dst_data))) - { - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - return hr; - } - - dst_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + dst_data = d3d12_resource_get_map_ptr(resource); + dst_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->left, dst_box->top, dst_box->front); + dst_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, dst_box->right, dst_box->bottom - 1, dst_box->back - 1) - dst_offset; vkd3d_format_copy_data(format, src_data, src_row_pitch, src_slice_pitch, - dst_data, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, + dst_data + dst_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_box->right - dst_box->left, dst_box->bottom - dst_box->top, dst_box->back - dst_box->front); - d3d12_heap_unmap(resource->heap, resource); + d3d12_resource_flush(resource, dst_offset, dst_size); return S_OK; } @@ -1408,10 +1393,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour VkImageSubresource vk_sub_resource; const struct vkd3d_format *format; VkSubresourceLayout vk_layout; + uint64_t src_offset, src_size; struct d3d12_device *device; uint8_t *src_data; D3D12_BOX box; - HRESULT hr; TRACE("iface %p, dst_data %p, dst_row_pitch %u, dst_slice_pitch %u, " "src_sub_resource %u, src_box %s.\n", @@ -1469,21 +1454,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour TRACE("Offset %#"PRIx64", size %#"PRIx64", row pitch %#"PRIx64", depth pitch %#"PRIx64".\n", vk_layout.offset, vk_layout.size, vk_layout.rowPitch, vk_layout.depthPitch); - if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, (void **)&src_data))) - { - WARN("Failed to map resource %p, hr %#x.\n", resource, hr); - return hr; - } - - src_data += vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + src_data = d3d12_resource_get_map_ptr(resource); + src_offset = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, vk_layout.depthPitch, src_box->left, src_box->top, src_box->front); + src_size = vk_layout.offset + vkd3d_format_get_data_offset(format, vk_layout.rowPitch, + vk_layout.depthPitch, src_box->right, src_box->bottom - 1, src_box->back - 1) - src_offset; - vkd3d_format_copy_data(format, src_data, vk_layout.rowPitch, vk_layout.depthPitch, + d3d12_resource_invalidate(resource, src_offset, src_size); + + vkd3d_format_copy_data(format, src_data + src_offset, vk_layout.rowPitch, vk_layout.depthPitch, dst_data, dst_row_pitch, dst_slice_pitch, src_box->right - src_box->left, src_box->bottom - src_box->top, src_box->back - src_box->front); - d3d12_heap_unmap(resource->heap, resource); - return S_OK; } @@ -1941,6 +1923,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, { resource->heap = heap; resource->heap_offset = heap_offset; + InterlockedIncrement(&heap->resource_count); } else { @@ -2061,24 +2044,72 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); } -/* CBVs, SRVs, UAVs */ -static struct vkd3d_view *vkd3d_view_create(enum vkd3d_view_type type) +/* Objects are cached so that vkd3d_view_incref() can safely check the refcount + * of an object freed by another thread. */ +static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) { - struct vkd3d_view *view; + union d3d12_desc_object u; + void *next; - if ((view = vkd3d_malloc(sizeof(*view)))) + do { - view->refcount = 1; - view->type = type; - view->serial_id = InterlockedIncrement64(&object_global_serial_id); - view->vk_counter_view = VK_NULL_HANDLE; + u.object = cache->head; + if (!u.object) + return vkd3d_malloc(cache->size); + next = u.header->next; } - return view; + while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); + + return u.object; +} + +static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) +{ + union d3d12_desc_object u = {object}; + void *head; + + do + { + head = cache->head; + u.header->next = head; + } + while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object)); +} + +static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) +{ + struct vkd3d_cbuffer_desc *desc; + + if (!(desc = vkd3d_desc_object_cache_get(&device->cbuffer_desc_cache))) + return NULL; + + desc->h.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; + desc->h.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + desc->h.refcount = 1; + + return desc; } -void vkd3d_view_incref(struct vkd3d_view *view) +static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_descriptor_type, + enum vkd3d_view_type type, struct d3d12_device *device) { - InterlockedIncrement(&view->refcount); + struct vkd3d_view *view; + + assert(magic); + + if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) + { + ERR("Failed to allocate descriptor object.\n"); + return NULL; + } + + view->h.magic = magic; + view->h.vk_descriptor_type = vk_descriptor_type; + view->h.refcount = 1; + view->v.type = type; + view->v.vk_counter_view = VK_NULL_HANDLE; + + return view; } static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) @@ -2087,313 +2118,299 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev TRACE("Destroying view %p.\n", view); - switch (view->type) + switch (view->v.type) { case VKD3D_VIEW_TYPE_BUFFER: - VK_CALL(vkDestroyBufferView(device->vk_device, view->u.vk_buffer_view, NULL)); + VK_CALL(vkDestroyBufferView(device->vk_device, view->v.u.vk_buffer_view, NULL)); break; case VKD3D_VIEW_TYPE_IMAGE: - VK_CALL(vkDestroyImageView(device->vk_device, view->u.vk_image_view, NULL)); + VK_CALL(vkDestroyImageView(device->vk_device, view->v.u.vk_image_view, NULL)); break; case VKD3D_VIEW_TYPE_SAMPLER: - VK_CALL(vkDestroySampler(device->vk_device, view->u.vk_sampler, NULL)); + VK_CALL(vkDestroySampler(device->vk_device, view->v.u.vk_sampler, NULL)); break; default: - WARN("Unhandled view type %d.\n", view->type); + WARN("Unhandled view type %d.\n", view->v.type); } - if (view->vk_counter_view) - VK_CALL(vkDestroyBufferView(device->vk_device, view->vk_counter_view, NULL)); + if (view->v.vk_counter_view) + VK_CALL(vkDestroyBufferView(device->vk_device, view->v.vk_counter_view, NULL)); - vkd3d_free(view); + vkd3d_desc_object_cache_push(&device->view_desc_cache, view); } -void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) +void vkd3d_view_decref(void *view, struct d3d12_device *device) { - if (!InterlockedDecrement(&view->refcount)) - vkd3d_view_destroy(view, device); + union d3d12_desc_object u = {view}; + + if (vkd3d_atomic_decrement(&u.header->refcount)) + return; + + if (u.header->magic != VKD3D_DESCRIPTOR_MAGIC_CBV) + vkd3d_view_destroy(u.view, device); + else + vkd3d_desc_object_cache_push(&device->cbuffer_desc_cache, u.object); } -/* TODO: write null descriptors to all applicable sets (invalid behaviour workaround). */ -static void d3d12_descriptor_heap_write_vk_descriptor_range(struct d3d12_descriptor_heap_vk_set *descriptor_set, - struct d3d12_desc_copy_location *locations, unsigned int write_count) +static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) { - unsigned int i, info_index = 0, write_index = 0; + if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) + vkd3d_view_decref(view, device); +} - switch (locations[0].src.s.vk_descriptor_type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pBufferInfo = &descriptor_set->vk_buffer_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_infos[info_index] = locations[info_index].src.s.u.vk_cbv_info; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].imageView = locations[info_index].src.s.u.view_info.view->u.vk_image_view; - } - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pTexelBufferView = &descriptor_set->vk_buffer_views[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_buffer_views[info_index] = locations[info_index].src.s.u.view_info.view->u.vk_buffer_view; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (; write_index < write_count; ++write_index) - { - descriptor_set->vk_descriptor_writes[write_index].pImageInfo = &descriptor_set->vk_image_infos[info_index]; - for (i = 0; i < descriptor_set->vk_descriptor_writes[write_index].descriptorCount; ++i, ++info_index) - descriptor_set->vk_image_infos[info_index].sampler = locations[info_index].src.s.u.view_info.view->u.vk_sampler; - } - break; - default: - ERR("Unhandled descriptor type %#x.\n", locations[0].src.s.vk_descriptor_type); - break; - } +#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 24 + +struct descriptor_writes +{ + VkDescriptorBufferInfo null_vk_cbv_info; + VkBufferView null_vk_buffer_view; + VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + void *held_refs[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + unsigned int count; + unsigned int held_ref_count; +}; + +static void descriptor_writes_free_object_refs(struct descriptor_writes *writes, struct d3d12_device *device) +{ + unsigned int i; + for (i = 0; i < writes->held_ref_count; ++i) + vkd3d_view_decref(writes->held_refs[i], device); + writes->held_ref_count = 0; } static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_heap *descriptor_heap, - uint32_t dst_array_element, const struct d3d12_device *device) + uint32_t dst_array_element, struct descriptor_writes *writes, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct d3d12_descriptor_heap_vk_set *descriptor_set; - VkBufferView vk_buffer_view = VK_NULL_HANDLE; - enum vkd3d_vk_descriptor_set_index i; - VkDescriptorBufferInfo vk_cbv_info; - - vk_cbv_info.buffer = VK_NULL_HANDLE; - vk_cbv_info.offset = 0; - vk_cbv_info.range = VK_WHOLE_SIZE; + enum vkd3d_vk_descriptor_set_index set; + unsigned int i = writes->count; /* Binding a shader with the wrong null descriptor type works in Windows. * To support that here we must write one to all applicable Vulkan sets. */ - for (i = VKD3D_SET_INDEX_UNIFORM_BUFFER; i <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++i) - { - descriptor_set = &descriptor_heap->vk_descriptor_sets[i]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst_array_element; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - switch (i) + for (set = VKD3D_SET_INDEX_UNIFORM_BUFFER; set <= VKD3D_SET_INDEX_STORAGE_IMAGE; ++set) + { + descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].descriptorType = descriptor_set->vk_type; + switch (set) { case VKD3D_SET_INDEX_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_writes[0].pBufferInfo = &vk_cbv_info; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = &writes->null_vk_cbv_info; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; break; case VKD3D_SET_INDEX_SAMPLED_IMAGE: case VKD3D_SET_INDEX_STORAGE_IMAGE: - descriptor_set->vk_image_infos[0].imageView = VK_NULL_HANDLE; + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageLayout = (set == VKD3D_SET_INDEX_STORAGE_IMAGE) + ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; case VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER: case VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &vk_buffer_view; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; break; default: assert(false); break; } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); + if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) + continue; + VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(writes, device); + i = 0; } + + writes->count = i; } -/* dst and src contain the same data unless another thread overwrites dst. The array index is - * calculated from dst, and src is thread safe. */ -static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) +static void d3d12_desc_write_vk_heap(struct d3d12_descriptor_heap *descriptor_heap, unsigned int dst_array_element, + struct descriptor_writes *writes, void *object, struct d3d12_device *device) { struct d3d12_descriptor_heap_vk_set *descriptor_set; - struct d3d12_descriptor_heap *descriptor_heap; const struct vkd3d_vk_device_procs *vk_procs; + union d3d12_desc_object u = {object}; + unsigned int i = writes->count; + VkDescriptorType type; bool is_null = false; - descriptor_heap = d3d12_desc_get_descriptor_heap(dst); - descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( - src->s.vk_descriptor_type)]; + type = u.header->vk_descriptor_type; + descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(type)]; vk_procs = &device->vk_procs; - vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - switch (src->s.vk_descriptor_type) + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].descriptorType = type; + switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - descriptor_set->vk_descriptor_writes[0].pBufferInfo = &src->s.u.vk_cbv_info; - is_null = !src->s.u.vk_cbv_info.buffer; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = &u.cb_desc->vk_cbv_info; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + is_null = !u.cb_desc->vk_cbv_info.buffer; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - is_null = !(descriptor_set->vk_image_infos[0].imageView = src->s.u.view_info.view->u.vk_image_view); + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = VK_NULL_HANDLE; + is_null = !(writes->vk_image_infos[i].imageView = u.view->v.u.vk_image_view); + writes->vk_image_infos[i].imageLayout = (type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) + ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->u.vk_buffer_view; - is_null = !src->s.u.view_info.view->u.vk_buffer_view; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = &u.view->v.u.vk_buffer_view; + is_null = !u.view->v.u.vk_buffer_view; break; case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_image_infos[0].sampler = src->s.u.view_info.view->u.vk_sampler; + writes->vk_descriptor_writes[i].pImageInfo = &writes->vk_image_infos[i]; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i].pTexelBufferView = NULL; + writes->vk_image_infos[i].sampler = u.view->v.u.vk_sampler; + writes->vk_image_infos[i].imageView = VK_NULL_HANDLE; + writes->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; break; default: - ERR("Unhandled descriptor type %#x.\n", src->s.vk_descriptor_type); + ERR("Unhandled descriptor type %#x.\n", type); break; } if (is_null && device->vk_info.EXT_robustness2) + return d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, dst_array_element, writes, device); + + ++i; + if (u.header->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && u.view->v.vk_counter_view) { - d3d12_desc_write_vk_heap_null_descriptor(descriptor_heap, - descriptor_set->vk_descriptor_writes[0].dstArrayElement, device); - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); - return; + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; + writes->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes->vk_descriptor_writes[i].pNext = NULL; + writes->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + writes->vk_descriptor_writes[i].dstBinding = 0; + writes->vk_descriptor_writes[i].dstArrayElement = dst_array_element; + writes->vk_descriptor_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + writes->vk_descriptor_writes[i].descriptorCount = 1; + writes->vk_descriptor_writes[i].pImageInfo = NULL; + writes->vk_descriptor_writes[i].pBufferInfo = NULL; + writes->vk_descriptor_writes[i++].pTexelBufferView = &u.view->v.vk_counter_view; } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); - - if (src->s.magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->s.u.view_info.view->vk_counter_view) + if (i >= ARRAY_SIZE(writes->vk_descriptor_writes) - 1) { - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - descriptor_set->vk_descriptor_writes[0].dstArrayElement = dst->index; - descriptor_set->vk_descriptor_writes[0].descriptorCount = 1; - descriptor_set->vk_descriptor_writes[0].pTexelBufferView = &src->s.u.view_info.view->vk_counter_view; - VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, descriptor_set->vk_descriptor_writes, 0, NULL)); + VK_CALL(vkUpdateDescriptorSets(device->vk_device, i, writes->vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(writes, device); + i = 0; } - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); + writes->count = i; } -static void d3d12_desc_write_atomic_d3d12_only(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) +void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device) { - struct vkd3d_view *defunct_view; - struct vkd3d_mutex *mutex; - - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct d3d12_desc *descriptors, *src; + struct descriptor_writes writes; + union d3d12_desc_object u; + unsigned int i, next; - if (!(dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) || InterlockedDecrement(&dst->s.u.view_info.view->refcount)) - { - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); + if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) return; - } - defunct_view = dst->s.u.view_info.view; - d3d12_desc_copy_raw(dst, src); - vkd3d_mutex_unlock(mutex); + writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; + writes.null_vk_cbv_info.offset = 0; + writes.null_vk_cbv_info.range = VK_WHOLE_SIZE; + writes.null_vk_buffer_view = VK_NULL_HANDLE; + writes.count = 0; + writes.held_ref_count = 0; - /* Destroy the view after unlocking to reduce wait time. */ - vkd3d_view_destroy(defunct_view, device); -} - -void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, - struct d3d12_device *device) -{ - struct vkd3d_view *defunct_view = NULL; - struct vkd3d_mutex *mutex; + descriptors = (struct d3d12_desc *)descriptor_heap->descriptors; - mutex = d3d12_device_get_descriptor_mutex(device, dst); - vkd3d_mutex_lock(mutex); + for (; i != UINT_MAX; i = next) + { + src = &descriptors[i]; + next = (int)src->next >> 1; - /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if ((dst->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - && !InterlockedDecrement(&dst->s.u.view_info.view->refcount)) - defunct_view = dst->s.u.view_info.view; + u.object = d3d12_desc_get_object_ref(src, device); - d3d12_desc_copy_raw(dst, src); + if (!u.object) + { + vkd3d_atomic_exchange(&src->next, 0); + continue; + } - vkd3d_mutex_unlock(mutex); + writes.held_refs[writes.held_ref_count++] = u.object; + d3d12_desc_write_vk_heap(descriptor_heap, i, &writes, u.object, device); - /* Destroy the view after unlocking to reduce wait time. */ - if (defunct_view) - vkd3d_view_destroy(defunct_view, device); + vkd3d_atomic_exchange(&src->next, 0); + } - if (device->use_vk_heaps && dst->s.magic) - d3d12_desc_write_vk_heap(dst, src, device); + /* Avoid thunk calls wherever possible. */ + if (writes.count) + VK_CALL(vkUpdateDescriptorSets(device->vk_device, writes.count, writes.vk_descriptor_writes, 0, NULL)); + descriptor_writes_free_object_refs(&writes, device); } -static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst) { - static const struct d3d12_desc null_desc = {0}; + struct d3d12_descriptor_heap *descriptor_heap; + unsigned int i, head; + + i = dst->index; + descriptor_heap = d3d12_desc_get_descriptor_heap(dst); + head = descriptor_heap->dirty_list_head; - d3d12_desc_write_atomic(descriptor, &null_desc, device); + /* Only one thread can swap the value away from zero. */ + if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) + return; + /* Now it is safe to modify 'next' to another nonzero value if necessary. */ + while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) + { + head = descriptor_heap->dirty_list_head; + vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); + } } -void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, write_count; - - vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); - - for (i = 0, write_count = 0; i < info->count; ++i) - { - d3d12_desc_write_atomic_d3d12_only(locations[i].dst, &locations[i].src, device); + void *object = src->s.u.object; - if (i && locations[i].dst == locations[i - 1].dst + 1) - { - ++descriptor_set->vk_descriptor_writes[write_count - 1].descriptorCount; - continue; - } - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - d3d12_descriptor_heap_write_vk_descriptor_range(descriptor_set, locations, write_count); - /* We could pass a VkCopyDescriptorSet array instead, but that would require also storing a src array index - * for each location, which means querying the src descriptor heap. Contiguous copies require contiguous src - * descriptors as well as dst, which is less likely to occur. And client race conditions may break it. */ - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); - - if (!info->uav_counter) - goto done; - - descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; - - for (i = 0, write_count = 0; i < info->count; ++i) - { - if (!locations[i].src.s.u.view_info.view->vk_counter_view) - continue; - descriptor_set->vk_buffer_views[write_count] = locations[i].src.s.u.view_info.view->vk_counter_view; - descriptor_set->vk_descriptor_writes[write_count].pTexelBufferView = &descriptor_set->vk_buffer_views[write_count]; - /* Accessing dst->index will be slow if a cache miss occurs, so calculate instead. */ - descriptor_set->vk_descriptor_writes[write_count].dstArrayElement = locations[i].dst - - (const struct d3d12_desc *)descriptor_heap->descriptors; - descriptor_set->vk_descriptor_writes[write_count++].descriptorCount = 1; - } - VK_CALL(vkUpdateDescriptorSets(device->vk_device, write_count, descriptor_set->vk_descriptor_writes, 0, NULL)); + d3d12_desc_replace(dst, object, device); + if (device->use_vk_heaps && object && !dst->next) + d3d12_desc_mark_as_modified(dst); +} -done: - vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +{ + d3d12_desc_replace(descriptor, NULL, device); } void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_desc tmp; - struct vkd3d_mutex *mutex; assert(dst != src); - /* Shadow of the Tomb Raider and possibly other titles sometimes destroy - * and rewrite a descriptor in another thread while it is being copied. */ - mutex = d3d12_device_get_descriptor_mutex(device, src); - vkd3d_mutex_lock(mutex); - - if (src->s.magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(src->s.u.view_info.view); - - d3d12_desc_copy_raw(&tmp, src); - - vkd3d_mutex_unlock(mutex); - + tmp.s.u.object = d3d12_desc_get_object_ref(src, device); d3d12_desc_write_atomic(dst, &tmp, device); } @@ -2455,8 +2472,9 @@ static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device, return vr == VK_SUCCESS; } -bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view) +bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, + struct vkd3d_view **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkBufferView vk_view = VK_NULL_HANDLE; @@ -2465,16 +2483,18 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c if (vk_buffer && !vkd3d_create_vk_buffer_view(device, vk_buffer, format, offset, size, &vk_view)) return false; - if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_BUFFER))) + if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV + ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VKD3D_VIEW_TYPE_BUFFER, device))) { VK_CALL(vkDestroyBufferView(device->vk_device, vk_view, NULL)); return false; } - object->u.vk_buffer_view = vk_view; - object->format = format; - object->info.buffer.offset = offset; - object->info.buffer.size = size; + object->v.u.vk_buffer_view = vk_view; + object->v.format = format; + object->v.info.buffer.offset = offset; + object->v.info.buffer.size = size; *view = object; return true; } @@ -2482,7 +2502,7 @@ bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, c #define VKD3D_VIEW_RAW_BUFFER 0x1 static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - struct d3d12_resource *resource, DXGI_FORMAT view_format, + uint32_t magic, struct d3d12_resource *resource, DXGI_FORMAT view_format, unsigned int offset, unsigned int size, unsigned int structure_stride, unsigned int flags, struct vkd3d_view **view) { @@ -2513,7 +2533,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, assert(d3d12_resource_is_buffer(resource)); - return vkd3d_create_buffer_view(device, resource->u.vk_buffer, + return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, format, offset * element_size, size * element_size, view); } @@ -2741,7 +2761,7 @@ static void vkd3d_texture_view_desc_normalise(struct vkd3d_texture_view_desc *de desc->layer_count = max_layer_count; } -bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, +bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -2774,18 +2794,19 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, } } - if (!(object = vkd3d_view_create(VKD3D_VIEW_TYPE_IMAGE))) + if (!(object = vkd3d_view_create(magic, magic == VKD3D_DESCRIPTOR_MAGIC_UAV ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE + : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VKD3D_VIEW_TYPE_IMAGE, device))) { VK_CALL(vkDestroyImageView(device->vk_device, vk_view, NULL)); return false; } - object->u.vk_image_view = vk_view; - object->format = format; - object->info.texture.vk_view_type = desc->view_type; - object->info.texture.miplevel_idx = desc->miplevel_idx; - object->info.texture.layer_idx = desc->layer_idx; - object->info.texture.layer_count = desc->layer_count; + object->v.u.vk_image_view = vk_view; + object->v.format = format; + object->v.info.texture.vk_view_type = desc->view_type; + object->v.info.texture.miplevel_idx = desc->miplevel_idx; + object->v.info.texture.layer_idx = desc->layer_idx; + object->v.info.texture.layer_count = desc->layer_count; *view = object; return true; } @@ -2794,6 +2815,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc) { struct VkDescriptorBufferInfo *buffer_info; + struct vkd3d_cbuffer_desc *cb_desc; struct d3d12_resource *resource; if (!desc) @@ -2802,13 +2824,19 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, return; } + if (!(cb_desc = vkd3d_cbuffer_desc_create(device))) + { + ERR("Failed to allocate descriptor object.\n"); + return; + } + if (desc->SizeInBytes & (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) { WARN("Size is not %u bytes aligned.\n", D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); return; } - buffer_info = &descriptor->s.u.vk_cbv_info; + buffer_info = &cb_desc->vk_cbv_info; if (desc->BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, desc->BufferLocation); @@ -2824,8 +2852,7 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, buffer_info->range = VK_WHOLE_SIZE; } - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor->s.u.cb_desc = cb_desc; } static unsigned int vkd3d_view_flags_from_d3d12_buffer_srv_flags(D3D12_BUFFER_SRV_FLAGS flags) @@ -2842,7 +2869,6 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image; if (!desc) @@ -2857,15 +2883,9 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer SRV %#x.\n", desc->Format); - if (vkd3d_create_buffer_view(device, null_resources->vk_buffer, + vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, null_resources->vk_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); return; case D3D12_SRV_DIMENSION_TEXTURE2D: @@ -2904,20 +2924,13 @@ static void vkd3d_create_null_srv(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_ZERO; vkd3d_desc.allowed_swizzle = true; - if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, vk_image, &vkd3d_desc, &descriptor->s.u.view); } static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { - struct vkd3d_view *view; unsigned int flags; if (!desc) @@ -2933,15 +2946,9 @@ static void vkd3d_create_buffer_srv(struct d3d12_desc *descriptor, } flags = vkd3d_view_flags_from_d3d12_buffer_srv_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, - desc->u.Buffer.StructureByteStride, flags, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + desc->u.Buffer.StructureByteStride, flags, &descriptor->s.u.view); } static VkImageAspectFlags vk_image_aspect_flags_from_d3d12_plane_slice(const struct vkd3d_format *format, @@ -2970,7 +2977,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; if (!resource) { @@ -3002,6 +3008,11 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, switch (desc->ViewDimension) { + case D3D12_SRV_DIMENSION_TEXTURE1D: + vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_1D; + vkd3d_desc.miplevel_idx = desc->u.Texture1D.MostDetailedMip; + vkd3d_desc.miplevel_count = desc->u.Texture1D.MipLevels; + break; case D3D12_SRV_DIMENSION_TEXTURE2D: vkd3d_desc.view_type = VK_IMAGE_VIEW_TYPE_2D; vkd3d_desc.miplevel_idx = desc->u.Texture2D.MostDetailedMip; @@ -3066,13 +3077,8 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, } } - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_SRV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_SRV, resource->u.vk_image, &vkd3d_desc, + &descriptor->s.u.view); } static unsigned int vkd3d_view_flags_from_d3d12_buffer_uav_flags(D3D12_BUFFER_UAV_FLAGS flags) @@ -3089,7 +3095,6 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, { struct vkd3d_null_resources *null_resources = &device->null_resources; struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; VkImage vk_image; if (!desc) @@ -3104,15 +3109,9 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, if (!device->vk_info.EXT_robustness2) WARN("Creating NULL buffer UAV %#x.\n", desc->Format); - if (vkd3d_create_buffer_view(device, null_resources->vk_storage_buffer, + vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, null_resources->vk_storage_buffer, vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false), - 0, VKD3D_NULL_BUFFER_SIZE, &view)) - { - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - } + 0, VKD3D_NULL_BUFFER_SIZE, &descriptor->s.u.view); return; case D3D12_UAV_DIMENSION_TEXTURE2D: @@ -3150,13 +3149,7 @@ static void vkd3d_create_null_uav(struct d3d12_desc *descriptor, vkd3d_desc.components.a = VK_COMPONENT_SWIZZLE_A; vkd3d_desc.allowed_swizzle = false; - if (!vkd3d_create_texture_view(device, vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, vk_image, &vkd3d_desc, &descriptor->s.u.view); } static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3179,16 +3172,11 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ } flags = vkd3d_view_flags_from_d3d12_buffer_uav_flags(desc->u.Buffer.Flags); - if (!vkd3d_create_buffer_view_for_resource(device, resource, desc->Format, + if (!vkd3d_create_buffer_view_for_resource(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource, desc->Format, desc->u.Buffer.FirstElement, desc->u.Buffer.NumElements, desc->u.Buffer.StructureByteStride, flags, &view)) return; - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; - if (counter_resource) { const struct vkd3d_format *format; @@ -3198,13 +3186,16 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, - desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) + desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->v.vk_counter_view)) { WARN("Failed to create counter buffer view.\n"); - view->vk_counter_view = VK_NULL_HANDLE; - d3d12_desc_destroy(descriptor, device); + view->v.vk_counter_view = VK_NULL_HANDLE; + vkd3d_view_decref(view, device); + return; } } + + descriptor->s.u.view = view; } static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, @@ -3212,7 +3203,6 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { struct vkd3d_texture_view_desc vkd3d_desc; - struct vkd3d_view *view; if (!init_default_texture_view_desc(&vkd3d_desc, resource, desc ? desc->Format : 0)) return; @@ -3227,6 +3217,9 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, { switch (desc->ViewDimension) { + case D3D12_UAV_DIMENSION_TEXTURE1D: + vkd3d_desc.miplevel_idx = desc->u.Texture1D.MipSlice; + break; case D3D12_UAV_DIMENSION_TEXTURE2D: vkd3d_desc.miplevel_idx = desc->u.Texture2D.MipSlice; if (desc->u.Texture2D.PlaneSlice) @@ -3257,13 +3250,8 @@ static void vkd3d_create_texture_uav(struct d3d12_desc *descriptor, } } - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) - return; - - descriptor->s.magic = VKD3D_DESCRIPTOR_MAGIC_UAV; - descriptor->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - descriptor->s.u.view_info.view = view; - descriptor->s.u.view_info.written_serial_id = view->serial_id; + vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_image, &vkd3d_desc, + &descriptor->s.u.view); } void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *device, @@ -3291,12 +3279,26 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d } bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view) { const struct vkd3d_format *format; struct d3d12_resource *resource; format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + + if (!gpu_address) + { + if (device->vk_info.EXT_robustness2) + { + *vk_buffer_view = VK_NULL_HANDLE; + return true; + } + WARN("Creating null buffer view.\n"); + return vkd3d_create_vk_buffer_view(device, parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV + ? device->null_resources.vk_storage_buffer : device->null_resources.vk_buffer, + format, 0, VK_WHOLE_SIZE, vk_buffer_view); + } + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); assert(d3d12_resource_is_buffer(resource)); return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, @@ -3412,21 +3414,21 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, FIXME("Ignoring border color {%.8e, %.8e, %.8e, %.8e}.\n", desc->BorderColor[0], desc->BorderColor[1], desc->BorderColor[2], desc->BorderColor[3]); - if (!(view = vkd3d_view_create(VKD3D_VIEW_TYPE_SAMPLER))) + if (!(view = vkd3d_view_create(VKD3D_DESCRIPTOR_MAGIC_SAMPLER, VK_DESCRIPTOR_TYPE_SAMPLER, + VKD3D_VIEW_TYPE_SAMPLER, device))) return; + view->v.u.vk_sampler = VK_NULL_HANDLE; + view->v.format = NULL; if (d3d12_create_sampler(device, desc->Filter, desc->AddressU, desc->AddressV, desc->AddressW, desc->MipLODBias, desc->MaxAnisotropy, - desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->u.vk_sampler) < 0) + desc->ComparisonFunc, desc->MinLOD, desc->MaxLOD, &view->v.u.vk_sampler) < 0) { - vkd3d_free(view); + vkd3d_view_decref(view, device); return; } - sampler->s.magic = VKD3D_DESCRIPTOR_MAGIC_SAMPLER; - sampler->s.vk_descriptor_type = VK_DESCRIPTOR_TYPE_SAMPLER; - sampler->s.u.view_info.view = view; - sampler->s.u.view_info.written_serial_id = view->serial_id; + sampler->s.u.view = view; } HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, @@ -3448,7 +3450,7 @@ HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, /* RTVs */ static void d3d12_rtv_desc_destroy(struct d3d12_rtv_desc *rtv, struct d3d12_device *device) { - if (rtv->magic != VKD3D_DESCRIPTOR_MAGIC_RTV) + if (!rtv->view) return; vkd3d_view_decref(rtv->view, device); @@ -3527,10 +3529,9 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource)); - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) return; - rtv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_RTV; rtv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); rtv_desc->format = vkd3d_desc.format; rtv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); @@ -3543,7 +3544,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev /* DSVs */ static void d3d12_dsv_desc_destroy(struct d3d12_dsv_desc *dsv, struct d3d12_device *device) { - if (dsv->magic != VKD3D_DESCRIPTOR_MAGIC_DSV) + if (!dsv->view) return; vkd3d_view_decref(dsv->view, device); @@ -3612,10 +3613,9 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev assert(d3d12_resource_is_texture(resource)); - if (!vkd3d_create_texture_view(device, resource->u.vk_image, &vkd3d_desc, &view)) + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) return; - dsv_desc->magic = VKD3D_DESCRIPTOR_MAGIC_DSV; dsv_desc->sample_count = vk_samples_from_dxgi_sample_desc(&resource->desc.SampleDesc); dsv_desc->format = vkd3d_desc.format; dsv_desc->width = d3d12_resource_desc_get_width(&resource->desc, vkd3d_desc.miplevel_idx); @@ -3883,7 +3883,6 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; VkDescriptorSetAllocateInfo set_desc; - unsigned int i; VkResult vr; set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; @@ -3897,8 +3896,7 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript set_size.pDescriptorCounts = &variable_binding_size; if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) { - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) - descriptor_set->vk_descriptor_writes[i].dstSet = descriptor_set->vk_set; + descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; return S_OK; } @@ -3914,7 +3912,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) @@ -3925,53 +3922,6 @@ static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descri for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) { - struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_descriptor_writes); ++i) - { - descriptor_set->vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descriptor_set->vk_descriptor_writes[i].pNext = NULL; - descriptor_set->vk_descriptor_writes[i].dstBinding = 0; - descriptor_set->vk_descriptor_writes[i].descriptorType = device->vk_descriptor_heap_layouts[set].type; - descriptor_set->vk_descriptor_writes[i].pImageInfo = NULL; - descriptor_set->vk_descriptor_writes[i].pBufferInfo = NULL; - descriptor_set->vk_descriptor_writes[i].pTexelBufferView = NULL; - } - switch (device->vk_descriptor_heap_layouts[set].type) - { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].sampler = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_set->vk_descriptor_writes[0].pImageInfo = &descriptor_set->vk_image_infos[0]; - for (i = 0; i < ARRAY_SIZE(descriptor_set->vk_image_infos); ++i) - { - descriptor_set->vk_image_infos[i].imageView = VK_NULL_HANDLE; - descriptor_set->vk_image_infos[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - } - break; - default: - ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); - return E_FAIL; - } if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) return hr; @@ -3995,6 +3945,7 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript return hr; d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); d3d12_device_add_ref(descriptor_heap->device = device); @@ -4047,7 +3998,9 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, { memset(&dst[i].s, 0, sizeof(dst[i].s)); dst[i].index = i; + dst[i].next = 0; } + object->dirty_list_head = UINT_MAX; } else { diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index c964ea8fe3a..5e46b467252 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -1958,7 +1958,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, }; @@ -2011,7 +2011,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER const struct vkd3d_shader_compile_option options[] = { - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_7}, + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_8}, {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, }; diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index 77b795d6278..b0150754434 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -31,8 +31,8 @@ #include "vkd3d_blob.h" #include "vkd3d_memory.h" #include "vkd3d_utf8.h" -#include "wine/list.h" -#include "wine/rbtree.h" +#include "list.h" +#include "rbtree.h" #include "vkd3d.h" #include "vkd3d_shader.h" @@ -44,13 +44,11 @@ #define VK_CALL(f) (vk_procs->f) -#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u - #define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u #define VKD3D_DESCRIPTOR_MAGIC_CBV VKD3D_MAKE_TAG('C', 'B', 'V', 0) -#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 1) -#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 1) -#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 1) +#define VKD3D_DESCRIPTOR_MAGIC_SRV VKD3D_MAKE_TAG('S', 'R', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_UAV VKD3D_MAKE_TAG('U', 'A', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER VKD3D_MAKE_TAG('S', 'M', 'P', 0) #define VKD3D_DESCRIPTOR_MAGIC_DSV VKD3D_MAKE_TAG('D', 'S', 'V', 0) #define VKD3D_DESCRIPTOR_MAGIC_RTV VKD3D_MAKE_TAG('R', 'T', 'V', 0) @@ -252,6 +250,31 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) { } +static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) +{ + return InterlockedDecrement((LONG volatile *)x); +} + +static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +{ + return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; +} + +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + return InterlockedExchange((LONG volatile *)x, val); +} + +static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +{ + return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + return InterlockedExchangePointer(x, val); +} + #else /* _WIN32 */ #include @@ -354,6 +377,63 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) ERR("Could not destroy the condition variable, error %d.\n", ret); } +# if HAVE_SYNC_SUB_AND_FETCH +static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) +{ + return __sync_sub_and_fetch(x, 1); +} +# else +# error "vkd3d_atomic_decrement() not implemented for this platform" +# endif /* HAVE_SYNC_ADD_AND_FETCH */ + +# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP +static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +{ + return __sync_bool_compare_and_swap(x, cmp, xchg); +} + +static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +{ + return __sync_bool_compare_and_swap(x, cmp, xchg); +} +# else +# error "vkd3d_atomic_compare_exchange() not implemented for this platform" +# endif + +# if HAVE_ATOMIC_EXCHANGE_N +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +} +# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP +static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +{ + unsigned int i; + do + { + i = *x; + } while (!__sync_bool_compare_and_swap(x, i, val)); + return i; +} + +static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +{ + void *p; + do + { + p = *x; + } while (!__sync_bool_compare_and_swap(x, p, val)); + return p; +} +# else +# error "vkd3d_atomic_exchange() not implemented for this platform" +# endif + #endif /* _WIN32 */ HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, @@ -563,6 +643,7 @@ struct d3d12_heap { ID3D12Heap ID3D12Heap_iface; LONG refcount; + LONG resource_count; bool is_private; D3D12_HEAP_DESC desc; @@ -661,11 +742,9 @@ enum vkd3d_view_type VKD3D_VIEW_TYPE_SAMPLER, }; -struct vkd3d_view +struct vkd3d_resource_view { - LONG refcount; enum vkd3d_view_type type; - uint64_t serial_id; union { VkBufferView vk_buffer_view; @@ -691,9 +770,6 @@ struct vkd3d_view } info; }; -void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device); -void vkd3d_view_incref(struct vkd3d_view *view); - struct vkd3d_texture_view_desc { VkImageViewType view_type; @@ -707,32 +783,88 @@ struct vkd3d_texture_view_desc bool allowed_swizzle; }; -bool vkd3d_create_buffer_view(struct d3d12_device *device, VkBuffer vk_buffer, const struct vkd3d_format *format, - VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); -bool vkd3d_create_texture_view(struct d3d12_device *device, VkImage vk_image, +struct vkd3d_desc_header +{ + uint32_t magic; + unsigned int volatile refcount; + void *next; + VkDescriptorType vk_descriptor_type; +}; + +struct vkd3d_view +{ + struct vkd3d_desc_header h; + struct vkd3d_resource_view v; +}; + +bool vkd3d_create_buffer_view(struct d3d12_device *device, uint32_t magic, VkBuffer vk_buffer, + const struct vkd3d_format *format, VkDeviceSize offset, VkDeviceSize size, struct vkd3d_view **view); +bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkImage vk_image, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view); -struct vkd3d_view_info +struct vkd3d_cbuffer_desc { - uint64_t written_serial_id; - struct vkd3d_view *view; + struct vkd3d_desc_header h; + VkDescriptorBufferInfo vk_cbv_info; }; struct d3d12_desc { struct { - uint32_t magic; - VkDescriptorType vk_descriptor_type; - union + union d3d12_desc_object { - VkDescriptorBufferInfo vk_cbv_info; - struct vkd3d_view_info view_info; + struct vkd3d_desc_header *header; + struct vkd3d_view *view; + struct vkd3d_cbuffer_desc *cb_desc; + void *object; } u; } s; unsigned int index; + unsigned int next; }; +void vkd3d_view_decref(void *view, struct d3d12_device *device); + +static inline bool vkd3d_view_incref(void *desc) +{ + struct vkd3d_desc_header *h = desc; + unsigned int refcount; + + do + { + refcount = h->refcount; + /* Avoid incrementing a freed object. Reading the value is safe because objects are recycled. */ + if (refcount <= 0) + return false; + } + while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); + + return true; +} + +static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc *src, struct d3d12_device *device) +{ + void *view; + + /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors + * from multiple threads without syncronisation. This is apparently valid in Windows. */ + for (;;) + { + do + { + view = src->s.u.object; + } while (view && !vkd3d_view_incref(view)); + + /* Check if the object is still in src to handle the case where it was + * already freed and reused elsewhere when the refcount was incremented. */ + if (view == src->s.u.object) + return view; + + vkd3d_view_decref(view, device); + } +} + static inline struct d3d12_desc *d3d12_desc_from_cpu_handle(D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle) { return (struct d3d12_desc *)cpu_handle.ptr; @@ -761,13 +893,12 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device * void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view); + D3D12_GPU_VIRTUAL_ADDRESS gpu_address, D3D12_ROOT_PARAMETER_TYPE parameter_type, VkBufferView *vk_buffer_view); HRESULT vkd3d_create_static_sampler(struct d3d12_device *device, const D3D12_STATIC_SAMPLER_DESC *desc, VkSampler *vk_sampler); struct d3d12_rtv_desc { - uint32_t magic; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; @@ -787,7 +918,6 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev struct d3d12_dsv_desc { - uint32_t magic; VkSampleCountFlagBits sample_count; const struct vkd3d_format *format; uint64_t width; @@ -837,15 +967,10 @@ struct vkd3d_vk_descriptor_heap_layout VkDescriptorSetLayout vk_set_layout; }; -#define VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE 64 - struct d3d12_descriptor_heap_vk_set { VkDescriptorSet vk_set; - VkDescriptorBufferInfo vk_buffer_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkBufferView vk_buffer_views[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkDescriptorImageInfo vk_image_infos[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; - VkWriteDescriptorSet vk_descriptor_writes[VKD3D_DESCRIPTOR_WRITE_BUFFER_SIZE]; + VkDescriptorType vk_type; }; /* ID3D12DescriptorHeap */ @@ -865,9 +990,13 @@ struct d3d12_descriptor_heap struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; struct vkd3d_mutex vk_sets_mutex; - BYTE descriptors[]; + unsigned int volatile dirty_list_head; + + uint8_t DECLSPEC_ALIGN(sizeof(void *)) descriptors[]; }; +void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device); + static inline struct d3d12_descriptor_heap *d3d12_desc_get_descriptor_heap(const struct d3d12_desc *descriptor) { return CONTAINING_RECORD(descriptor - descriptor->index, struct d3d12_descriptor_heap, descriptors); @@ -882,22 +1011,6 @@ static inline unsigned int d3d12_desc_heap_range_size(const struct d3d12_desc *d HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc, struct d3d12_descriptor_heap **descriptor_heap); -struct d3d12_desc_copy_location -{ - struct d3d12_desc src; - struct d3d12_desc *dst; -}; - -struct d3d12_desc_copy_info -{ - unsigned int count; - bool uav_counter; -}; - -void d3d12_desc_copy_vk_heap_range(struct d3d12_desc_copy_location *locations, const struct d3d12_desc_copy_info *info, - struct d3d12_descriptor_heap *descriptor_heap, enum vkd3d_vk_descriptor_set_index set, - struct d3d12_device *device); - /* ID3D12QueryHeap */ struct d3d12_query_heap { @@ -1295,6 +1408,8 @@ struct d3d12_command_list VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); + struct d3d12_descriptor_heap *descriptor_heaps[64]; + unsigned int descriptor_heap_count; struct vkd3d_private_store private_store; }; @@ -1485,6 +1600,12 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); +struct vkd3d_desc_object_cache +{ + void * volatile head; + size_t size; +}; + #define VKD3D_DESCRIPTOR_POOL_COUNT 6 /* ID3D12Device */ @@ -1502,7 +1623,8 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator; struct vkd3d_mutex mutex; - struct vkd3d_mutex desc_mutex[8]; + struct vkd3d_desc_object_cache view_desc_cache; + struct vkd3d_desc_object_cache cbuffer_desc_cache; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache; @@ -1544,6 +1666,7 @@ struct d3d12_device struct vkd3d_uav_clear_state uav_clear_state; VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; + unsigned int vk_pool_count; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; }; @@ -1577,19 +1700,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); } -static inline struct vkd3d_mutex *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, - const struct d3d12_desc *descriptor) -{ - STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); - uintptr_t idx = (uintptr_t)descriptor; - - idx ^= idx >> 12; - idx ^= idx >> 6; - idx ^= idx >> 3; - - return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; -} - /* utils */ enum vkd3d_format_type { -- 2.40.1