diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch index 096b0f86..77b0aa75 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch @@ -1,4 +1,4 @@ -From bd0a25db493913e053d172d97ec776dc43ab844b Mon Sep 17 00:00:00 2001 +From fef220fc5d2e7bfae4b08bf0c6c5fb75aab4cf3d Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to 4a209efb6278586d412ceb0a7cbe21e6769a7367. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch index 231be66f..72143828 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch @@ -1,4 +1,4 @@ -From 6164fccbfff38a799909e855255b0137d8e5fc2d Mon Sep 17 00:00:00 2001 +From f9c536f34a5158f0039748e5ef5f08afecbc67cf Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 23 Apr 2024 08:01:19 +1000 Subject: [PATCH] Updated vkd3d to 7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6eaeb4. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch index 644ce00d..a7aeeacd 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch @@ -1,4 +1,4 @@ -From cbe529eb436f19a2f0a8654fe6a49d6566c3e6a2 Mon Sep 17 00:00:00 2001 +From b0a4c2becfa8f65d1751256d5297cd4319a2605d Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 24 Apr 2024 09:05:20 +1000 Subject: [PATCH] Updated vkd3d to 46fca3f9f4a9b47b32e9dfbacda0f3d19536c02c. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch new file mode 100644 index 00000000..8d944114 --- /dev/null +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch @@ -0,0 +1,953 @@ +From 669446d32104b9a4b879f5cb2ad46a964606fa70 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Sun, 28 Apr 2024 09:46:44 +1000 +Subject: [PATCH] Updated vkd3d to 13e1491941a1af32ddfc1019fa304231fd121c4d. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 63 +++++++ + libs/vkd3d/include/vkd3d_types.h | 6 + + libs/vkd3d/libs/vkd3d-common/blob.c | 3 + + libs/vkd3d/libs/vkd3d-common/error.c | 6 + + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 61 ++++++- + libs/vkd3d/libs/vkd3d-shader/ir.c | 122 +++++++++++++- + libs/vkd3d/libs/vkd3d/cache.c | 195 ++++++++++++++++++++++ + libs/vkd3d/libs/vkd3d/device.c | 36 +++- + libs/vkd3d/libs/vkd3d/resource.c | 24 +-- + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 2 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 92 ++-------- + 11 files changed, 497 insertions(+), 113 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index b0e9230dab6..2d950b4f7aa 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -340,6 +340,11 @@ static inline int vkd3d_u32_compare(uint32_t x, uint32_t y) + return (x > y) - (x < y); + } + ++static inline int vkd3d_u64_compare(uint64_t x, uint64_t y) ++{ ++ return (x > y) - (x < y); ++} ++ + #define VKD3D_BITMAP_SIZE(x) (((x) + 0x1f) >> 5) + + static inline bool bitmap_clear(uint32_t *map, unsigned int idx) +@@ -431,6 +436,64 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) + return vkd3d_atomic_add_fetch_u32(x, 1); + } + ++static inline bool vkd3d_atomic_compare_exchange_u32(uint32_t volatile *x, uint32_t expected, uint32_t val) ++{ ++#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP ++ return __sync_bool_compare_and_swap(x, expected, val); ++#elif defined(_WIN32) ++ return InterlockedCompareExchange((LONG *)x, val, expected) == expected; ++#else ++# error "vkd3d_atomic_compare_exchange_u32() not implemented for this platform" ++#endif ++} ++ ++static inline bool vkd3d_atomic_compare_exchange_ptr(void * volatile *x, void *expected, void *val) ++{ ++#if HAVE_SYNC_BOOL_COMPARE_AND_SWAP ++ return __sync_bool_compare_and_swap(x, expected, val); ++#elif defined(_WIN32) ++ return InterlockedCompareExchangePointer(x, val, expected) == expected; ++#else ++# error "vkd3d_atomic_compare_exchange_ptr() not implemented for this platform" ++#endif ++} ++ ++static inline uint32_t vkd3d_atomic_exchange_u32(uint32_t volatile *x, uint32_t val) ++{ ++#if HAVE_ATOMIC_EXCHANGE_N ++ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); ++#elif defined(_WIN32) ++ return InterlockedExchange((LONG *)x, val); ++#else ++ uint32_t expected; ++ ++ do ++ { ++ expected = *x; ++ } while (!vkd3d_atomic_compare_exchange_u32(x, expected, val)); ++ ++ return expected; ++#endif ++} ++ ++static inline void *vkd3d_atomic_exchange_ptr(void * volatile *x, void *val) ++{ ++#if HAVE_ATOMIC_EXCHANGE_N ++ return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); ++#elif defined(_WIN32) ++ return InterlockedExchangePointer(x, val); ++#else ++ void *expected; ++ ++ do ++ { ++ expected = *x; ++ } while (!vkd3d_atomic_compare_exchange_ptr(x, expected, val)); ++ ++ return expected; ++#endif ++} ++ + struct vkd3d_mutex + { + #ifdef _WIN32 +diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h +index 017eaf11806..dc5a7c064ae 100644 +--- a/libs/vkd3d/include/vkd3d_types.h ++++ b/libs/vkd3d/include/vkd3d_types.h +@@ -53,6 +53,12 @@ enum vkd3d_result + VKD3D_ERROR_INVALID_SHADER = -4, + /** The operation is not implemented in this version of vkd3d. */ + VKD3D_ERROR_NOT_IMPLEMENTED = -5, ++ /** The object or entry already exists. \since 1.12 */ ++ VKD3D_ERROR_KEY_ALREADY_EXISTS = -6, ++ /** The requested object was not found. \since 1.12 */ ++ VKD3D_ERROR_NOT_FOUND = -7, ++ /** The output buffer is larger than the requested object \since 1.12. */ ++ VKD3D_ERROR_MORE_DATA = -8, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_RESULT), + }; +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index 6bc95dc55c4..c2c6ad67804 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -16,6 +16,9 @@ + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + ++#ifndef __MINGW32__ ++#define WIDL_C_INLINE_WRAPPERS ++#endif + #define COBJMACROS + + #define CONST_VTABLE +diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c +index b8350a5404c..2f978c4977d 100644 +--- a/libs/vkd3d/libs/vkd3d-common/error.c ++++ b/libs/vkd3d/libs/vkd3d-common/error.c +@@ -35,6 +35,12 @@ HRESULT hresult_from_vkd3d_result(int vkd3d_result) + return E_INVALIDARG; + case VKD3D_ERROR_NOT_IMPLEMENTED: + return E_NOTIMPL; ++ case VKD3D_ERROR_KEY_ALREADY_EXISTS: ++ return DXGI_ERROR_ALREADY_EXISTS; ++ case VKD3D_ERROR_NOT_FOUND: ++ return DXGI_ERROR_NOT_FOUND; ++ case VKD3D_ERROR_MORE_DATA: ++ return DXGI_ERROR_MORE_DATA; + default: + FIXME("Unhandled vkd3d result %d.\n", vkd3d_result); + return E_FAIL; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 55d9ecf707d..09e4f596241 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1813,6 +1813,7 @@ static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) + struct sm1_instruction + { + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; ++ unsigned int flags; + + struct sm1_dst_register + { +@@ -1852,6 +1853,8 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu + uint32_t token = instr->opcode; + unsigned int i; + ++ token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); ++ + if (ctx->profile->major_version > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); +@@ -2414,6 +2417,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + ++static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_block *block); ++ ++static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++{ ++ const struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ const struct hlsl_ir_node *condition; ++ struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; ++ ++ condition = iff->condition.node; ++ assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); ++ ++ sm1_ifc = (struct sm1_instruction) ++ { ++ .opcode = D3DSIO_IFC, ++ .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ ++ ++ .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), ++ .srcs[0].reg = condition->reg.id, ++ .srcs[0].mod = 0, ++ ++ .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), ++ .srcs[1].reg = condition->reg.id, ++ .srcs[1].mod = D3DSPSM_NEG, ++ ++ .src_count = 2, ++ }; ++ write_sm1_instruction(ctx, buffer, &sm1_ifc); ++ write_sm1_block(ctx, buffer, &iff->then_block); ++ ++ if (!list_empty(&iff->else_block.instrs)) ++ { ++ sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; ++ write_sm1_instruction(ctx, buffer, &sm1_else); ++ write_sm1_block(ctx, buffer, &iff->else_block); ++ } ++ ++ sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; ++ write_sm1_instruction(ctx, buffer, &sm1_endif); ++} ++ + static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); +@@ -2614,12 +2660,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + write_sm1_instruction(ctx, buffer, &sm1_instr); + } + +-static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_function_decl *entry_func) ++static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++ const struct hlsl_block *block) + { + const struct hlsl_ir_node *instr; + +- LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { +@@ -2643,6 +2689,13 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + write_sm1_expr(ctx, buffer, instr); + break; + ++ case HLSL_IR_IF: ++ if (hlsl_version_ge(ctx, 2, 1)) ++ write_sm1_if(ctx, buffer, instr); ++ else ++ hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); ++ break; ++ + case HLSL_IR_JUMP: + write_sm1_jump(ctx, buffer, instr); + break; +@@ -2680,7 +2733,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun + write_sm1_constant_defs(ctx, &buffer); + write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_sampler_dcls(ctx, &buffer); +- write_sm1_instructions(ctx, &buffer, entry_func); ++ write_sm1_block(ctx, &buffer, &entry_func->body); + + put_u32(&buffer, D3DSIO_END); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 59b74c065d8..121b0fe3a6c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -82,6 +82,106 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, + return true; + } + ++static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, ++ enum vkd3d_shader_opcode *opcode, bool *requires_swap) ++{ ++ switch (rel_op) ++ { ++ case VKD3D_SHADER_REL_OP_LT: ++ case VKD3D_SHADER_REL_OP_GT: ++ *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_GT); ++ if (data_type == VKD3D_DATA_FLOAT) ++ { ++ *opcode = VKD3DSIH_LTO; ++ return true; ++ } ++ break; ++ ++ case VKD3D_SHADER_REL_OP_GE: ++ case VKD3D_SHADER_REL_OP_LE: ++ *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_LE); ++ if (data_type == VKD3D_DATA_FLOAT) ++ { ++ *opcode = VKD3DSIH_GEO; ++ return true; ++ } ++ break; ++ ++ case VKD3D_SHADER_REL_OP_EQ: ++ *requires_swap = false; ++ if (data_type == VKD3D_DATA_FLOAT) ++ { ++ *opcode = VKD3DSIH_EQO; ++ return true; ++ } ++ break; ++ ++ case VKD3D_SHADER_REL_OP_NE: ++ *requires_swap = false; ++ if (data_type == VKD3D_DATA_FLOAT) ++ { ++ *opcode = VKD3DSIH_NEO; ++ return true; ++ } ++ break; ++ } ++ return false; ++} ++ ++static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, ++ struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = ifc - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ bool swap; ++ ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ if (*tmp_idx == ~0u) ++ *tmp_idx = program->temp_count++; ++ ++ /* Replace ifc comparison with actual comparison, saving the result in the tmp register. */ ++ if (!(get_opcode_from_rel_op(ifc->flags, ifc->src[0].reg.data_type, &opcode, &swap))) ++ { ++ vkd3d_shader_error(message_context, &ifc->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Aborting due to not yet implemented feature: opcode for rel_op %u and data type %u.", ++ ifc->flags, ifc->src[0].reg.data_type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ ins = &instructions->elements[pos + 1]; ++ if (!vsir_instruction_init_with_params(program, ins, &ifc->location, opcode, 1, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].reg.idx[0].offset = *tmp_idx; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; ++ ++ ins->src[0] = ifc->src[swap]; ++ ins->src[1] = ifc->src[!swap]; ++ ++ /* Create new if instruction using the previous result. */ ++ ins = &instructions->elements[pos + 2]; ++ if (!vsir_instruction_init_with_params(program, ins, &ifc->location, VKD3DSIH_IF, 0, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; ++ ++ vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].reg.idx[0].offset = *tmp_idx; ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ ++ /* Make the original instruction no-op */ ++ vkd3d_shader_instruction_make_nop(ifc); ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, + struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) + { +@@ -210,7 +310,8 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro + return VKD3D_OK; + } + +-static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) ++static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + unsigned int tmp_idx = ~0u, i; +@@ -222,6 +323,11 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + + switch (ins->handler_idx) + { ++ case VKD3DSIH_IFC: ++ if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) ++ return ret; ++ break; ++ + case VKD3DSIH_TEXKILL: + if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) + return ret; +@@ -4992,12 +5098,12 @@ static void register_map_undominated_use(struct vkd3d_shader_register *reg, stru + { + unsigned int i; + +- if (!register_is_ssa(reg)) +- return; +- +- i = reg->idx[0].offset; +- if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) +- alloc->table[i] = alloc->next_temp_idx++; ++ if (register_is_ssa(reg)) ++ { ++ i = reg->idx[0].offset; ++ if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block)) ++ alloc->table[i] = alloc->next_temp_idx++; ++ } + + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) +@@ -6056,7 +6162,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t + { + enum vkd3d_result result = VKD3D_OK; + +- if ((result = vsir_program_lower_instructions(program)) < 0) ++ if ((result = vsir_program_lower_instructions(program, message_context)) < 0) + return result; + + if (program->shader_version.major >= 6) +diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c +index 56ba6990420..a0a29ed30cb 100644 +--- a/libs/vkd3d/libs/vkd3d/cache.c ++++ b/libs/vkd3d/libs/vkd3d/cache.c +@@ -18,11 +18,60 @@ + + #include "vkd3d_private.h" + ++struct vkd3d_cache_entry_header ++{ ++ uint64_t hash; ++ uint64_t key_size; ++ uint64_t value_size; ++}; ++ + struct vkd3d_shader_cache + { + unsigned int refcount; ++ struct vkd3d_mutex lock; ++ ++ struct rb_tree tree; + }; + ++struct shader_cache_entry ++{ ++ struct vkd3d_cache_entry_header h; ++ struct rb_entry entry; ++ uint8_t *payload; ++}; ++ ++struct shader_cache_key ++{ ++ uint64_t hash; ++ const void *key; ++ uint64_t key_size; ++}; ++ ++static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry *entry) ++{ ++ const struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); ++ const struct shader_cache_key *k = key; ++ int ret; ++ ++ if ((ret = vkd3d_u64_compare(k->hash, e->h.hash))) ++ return ret; ++ if ((ret = vkd3d_u64_compare(k->key_size, e->h.key_size))) ++ return ret; ++ ++ /* Until now we have not seen an actual hash collision. If the key didn't match it was always ++ * due to a bug in the serialization code or memory corruption. If you see this FIXME please ++ * investigate. */ ++ if ((ret = memcmp(k->key, e->payload, k->key_size))) ++ FIXME("Actual case of a hash collision found.\n"); ++ return ret; ++} ++ ++static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, ++ struct shader_cache_entry *e) ++{ ++ rb_put(&cache->tree, &e->h.hash, &e->entry); ++} ++ + int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) + { + struct vkd3d_shader_cache *object; +@@ -34,6 +83,9 @@ int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) + return VKD3D_ERROR_OUT_OF_MEMORY; + + object->refcount = 1; ++ rb_init(&object->tree, vkd3d_shader_cache_compare_key); ++ vkd3d_mutex_init(&object->lock); ++ + *cache = object; + + return VKD3D_OK; +@@ -46,6 +98,13 @@ unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache) + return refcount; + } + ++static void vkd3d_shader_cache_destroy_entry(struct rb_entry *entry, void *context) ++{ ++ struct shader_cache_entry *e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); ++ vkd3d_free(e->payload); ++ vkd3d_free(e); ++} ++ + unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) + { + unsigned int refcount = vkd3d_atomic_decrement_u32(&cache->refcount); +@@ -54,6 +113,142 @@ unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache) + if (refcount) + return refcount; + ++ rb_destroy(&cache->tree, vkd3d_shader_cache_destroy_entry, NULL); ++ vkd3d_mutex_destroy(&cache->lock); ++ + vkd3d_free(cache); + return 0; + } ++ ++static uint64_t vkd3d_shader_cache_hash_key(const void *key, size_t size) ++{ ++ static const uint64_t fnv_prime = 0x00000100000001b3; ++ uint64_t hash = 0xcbf29ce484222325; ++ const uint8_t *k = key; ++ size_t i; ++ ++ for (i = 0; i < size; ++i) ++ hash = (hash ^ k[i]) * fnv_prime; ++ ++ return hash; ++} ++ ++static void vkd3d_shader_cache_lock(struct vkd3d_shader_cache *cache) ++{ ++ vkd3d_mutex_lock(&cache->lock); ++} ++ ++static void vkd3d_shader_cache_unlock(struct vkd3d_shader_cache *cache) ++{ ++ vkd3d_mutex_unlock(&cache->lock); ++} ++ ++int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, ++ const void *key, size_t key_size, const void *value, size_t value_size) ++{ ++ struct shader_cache_entry *e; ++ struct shader_cache_key k; ++ struct rb_entry *entry; ++ enum vkd3d_result ret; ++ ++ TRACE("%p, %p, %#zx, %p, %#zx.\n", cache, key, key_size, value, value_size); ++ ++ k.hash = vkd3d_shader_cache_hash_key(key, key_size); ++ k.key = key; ++ k.key_size = key_size; ++ ++ vkd3d_shader_cache_lock(cache); ++ ++ entry = rb_get(&cache->tree, &k); ++ e = entry ? RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry) : NULL; ++ ++ if (e) ++ { ++ WARN("Key already exists, returning VKD3D_ERROR_KEY_ALREADY_EXISTS.\n"); ++ ret = VKD3D_ERROR_KEY_ALREADY_EXISTS; ++ goto done; ++ } ++ ++ e = vkd3d_malloc(sizeof(*e)); ++ if (!e) ++ { ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto done; ++ } ++ e->payload = vkd3d_malloc(key_size + value_size); ++ if (!e->payload) ++ { ++ vkd3d_free(e); ++ ret = VKD3D_ERROR_OUT_OF_MEMORY; ++ goto done; ++ } ++ ++ e->h.key_size = key_size; ++ e->h.value_size = value_size; ++ e->h.hash = k.hash; ++ memcpy(e->payload, key, key_size); ++ memcpy(e->payload + key_size, value, value_size); ++ ++ vkd3d_shader_cache_add_entry(cache, e); ++ TRACE("Cache entry %#"PRIx64" stored.\n", k.hash); ++ ret = VKD3D_OK; ++ ++done: ++ vkd3d_shader_cache_unlock(cache); ++ return ret; ++} ++ ++int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, ++ const void *key, size_t key_size, void *value, size_t *value_size) ++{ ++ struct shader_cache_entry *e; ++ struct shader_cache_key k; ++ struct rb_entry *entry; ++ enum vkd3d_result ret; ++ size_t size_in; ++ ++ TRACE("%p, %p, %#zx, %p, %p.\n", cache, key, key_size, value, value_size); ++ ++ size_in = *value_size; ++ ++ k.hash = vkd3d_shader_cache_hash_key(key, key_size); ++ k.key = key; ++ k.key_size = key_size; ++ ++ vkd3d_shader_cache_lock(cache); ++ ++ entry = rb_get(&cache->tree, &k); ++ if (!entry) ++ { ++ WARN("Entry not found.\n"); ++ ret = VKD3D_ERROR_NOT_FOUND; ++ goto done; ++ } ++ ++ e = RB_ENTRY_VALUE(entry, struct shader_cache_entry, entry); ++ ++ *value_size = e->h.value_size; ++ if (!value) ++ { ++ TRACE("Found item %#"PRIx64", returning needed size %#"PRIx64".\n", ++ e->h.hash, e->h.value_size); ++ ret = VKD3D_OK; ++ goto done; ++ } ++ ++ if (size_in < e->h.value_size) ++ { ++ WARN("Output buffer is too small for item %#"PRIx64", got %#zx want %#"PRIx64".\n", ++ e->h.hash, size_in, e->h.value_size); ++ ret = VKD3D_ERROR_MORE_DATA; ++ goto done; ++ } ++ ++ memcpy(value, e->payload + e->h.key_size, e->h.value_size); ++ ret = VKD3D_OK; ++ TRACE("Returning cached item %#"PRIx64".\n", e->h.hash); ++ ++done: ++ vkd3d_shader_cache_unlock(cache); ++ return ret; ++} +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index a394e3f7592..cb2b6ad0364 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -2685,19 +2685,43 @@ static HRESULT STDMETHODCALLTYPE d3d12_cache_session_GetDevice(ID3D12ShaderCache + static HRESULT STDMETHODCALLTYPE d3d12_cache_session_FindValue(ID3D12ShaderCacheSession *iface, + const void *key, UINT key_size, void *value, UINT *value_size) + { +- FIXME("iface %p, key %p, key_size %#x, value %p, value_size %p stub!\n", ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ enum vkd3d_result ret; ++ size_t size; ++ ++ TRACE("iface %p, key %p, key_size %#x, value %p, value_size %p.\n", + iface, key, key_size, value, value_size); + +- return DXGI_ERROR_NOT_FOUND; ++ if (!value_size) ++ { ++ WARN("value_size is NULL, returning E_INVALIDARG.\n"); ++ return E_INVALIDARG; ++ } ++ ++ size = *value_size; ++ ret = vkd3d_shader_cache_get(session->cache, key, key_size, value, &size); ++ *value_size = size; ++ ++ return hresult_from_vkd3d_result(ret); + } + + static HRESULT STDMETHODCALLTYPE d3d12_cache_session_StoreValue(ID3D12ShaderCacheSession *iface, + const void *key, UINT key_size, const void *value, UINT value_size) + { +- FIXME("iface %p, key %p, key_size %#x, value %p, value_size %u stub!\n", iface, key, key_size, +- value, value_size); ++ struct d3d12_cache_session *session = impl_from_ID3D12ShaderCacheSession(iface); ++ enum vkd3d_result ret; + +- return E_NOTIMPL; ++ TRACE("iface %p, key %p, key_size %#x, value %p, value_size %u.\n", ++ iface, key, key_size, value, value_size); ++ ++ if (!key || !key_size || !value || !value_size) ++ { ++ WARN("Invalid input parameters, returning E_INVALIDARG.\n"); ++ return E_INVALIDARG; ++ } ++ ++ ret = vkd3d_shader_cache_put(session->cache, key, key_size, value, value_size); ++ return hresult_from_vkd3d_result(ret); + } + + static void STDMETHODCALLTYPE d3d12_cache_session_SetDeleteOnDestroy(ID3D12ShaderCacheSession *iface) +@@ -2833,7 +2857,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device9 *ifac + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { +- ID3D12Device_AddRef(iface); ++ ID3D12Device9_AddRef(iface); + *object = iface; + return S_OK; + } +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index 179999148bc..7a2f464c98e 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -1271,7 +1271,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource2 * + || IsEqualGUID(riid, &IID_ID3D12Object) + || IsEqualGUID(riid, &IID_IUnknown)) + { +- ID3D12Resource_AddRef(iface); ++ ID3D12Resource2_AddRef(iface); + *object = iface; + return S_OK; + } +@@ -2350,16 +2350,16 @@ static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) + i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; + for (;;) + { +- if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) ++ if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) + { + if ((u.object = cache->heads[i].head)) + { + vkd3d_atomic_decrement_u32(&cache->free_count); + cache->heads[i].head = u.header->next; +- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); + return u.object; + } +- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); + } + /* Keeping a free count avoids uncertainty over when this loop should terminate, + * which could result in excess allocations gradually increasing without limit. */ +@@ -2381,7 +2381,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, + i = vkd3d_atomic_increment_u32(&cache->next_index) & HEAD_INDEX_MASK; + for (;;) + { +- if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) ++ if (vkd3d_atomic_compare_exchange_u32(&cache->heads[i].spinlock, 0, 1)) + break; + i = (i + 1) & HEAD_INDEX_MASK; + } +@@ -2389,7 +2389,7 @@ static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, + head = cache->heads[i].head; + u.header->next = head; + cache->heads[i].head = u.object; +- vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); ++ vkd3d_atomic_exchange_u32(&cache->heads[i].spinlock, 0); + vkd3d_atomic_increment_u32(&cache->free_count); + } + +@@ -2473,7 +2473,7 @@ void vkd3d_view_decref(void *view, struct d3d12_device *device) + + static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) + { +- if ((view = vkd3d_atomic_exchange_pointer(&dst->s.u.object, view))) ++ if ((view = vkd3d_atomic_exchange_ptr(&dst->s.u.object, view))) + vkd3d_view_decref(view, device); + } + +@@ -2652,7 +2652,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr + union d3d12_desc_object u; + unsigned int i, next; + +- if ((i = vkd3d_atomic_exchange(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) ++ if ((i = vkd3d_atomic_exchange_u32(&descriptor_heap->dirty_list_head, UINT_MAX)) == UINT_MAX) + return; + + writes.null_vk_cbv_info.buffer = VK_NULL_HANDLE; +@@ -2667,7 +2667,7 @@ void d3d12_desc_flush_vk_heap_updates_locked(struct d3d12_descriptor_heap *descr + for (; i != UINT_MAX; i = next) + { + src = &descriptors[i]; +- next = vkd3d_atomic_exchange(&src->next, 0); ++ next = vkd3d_atomic_exchange_u32(&src->next, 0); + next = (int)next >> 1; + + /* A race exists here between updating src->next and getting the current object. The best +@@ -2695,13 +2695,13 @@ static void d3d12_desc_mark_as_modified(struct d3d12_desc *dst, struct d3d12_des + head = descriptor_heap->dirty_list_head; + + /* Only one thread can swap the value away from zero. */ +- if (!vkd3d_atomic_compare_exchange(&dst->next, 0, (head << 1) | 1)) ++ if (!vkd3d_atomic_compare_exchange_u32(&dst->next, 0, (head << 1) | 1)) + return; + /* Now it is safe to modify 'next' to another nonzero value if necessary. */ +- while (!vkd3d_atomic_compare_exchange(&descriptor_heap->dirty_list_head, head, i)) ++ while (!vkd3d_atomic_compare_exchange_u32(&descriptor_heap->dirty_list_head, head, i)) + { + head = descriptor_heap->dirty_list_head; +- vkd3d_atomic_exchange(&dst->next, (head << 1) | 1); ++ vkd3d_atomic_exchange_u32(&dst->next, (head << 1) | 1); + } + } + +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +index 29305fbdc63..c7431bd821b 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +@@ -71,7 +71,7 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + + if (!device) + { +- ID3D12Device_Release(&object->ID3D12Device9_iface); ++ ID3D12Device9_Release(&object->ID3D12Device9_iface); + return S_FALSE; + } + +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 5f60c8d90ad..d1fa866d9e3 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -19,6 +19,9 @@ + #ifndef __VKD3D_PRIVATE_H + #define __VKD3D_PRIVATE_H + ++#ifndef __MINGW32__ ++#define WIDL_C_INLINE_WRAPPERS ++#endif + #define COBJMACROS + #define NONAMELESSUNION + #define VK_NO_PROTOTYPES +@@ -194,93 +197,14 @@ struct vkd3d_instance + unsigned int refcount; + }; + +-#ifdef _WIN32 +- +-union vkd3d_thread_handle +-{ +- void *handle; +-}; +- +-static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +-{ +- return InterlockedCompareExchange((LONG volatile *)x, xchg, cmp) == cmp; +-} +- +-static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +-{ +- return InterlockedExchange((LONG volatile *)x, val); +-} +- +-static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +-{ +- return InterlockedCompareExchangePointer(x, xchg, cmp) == cmp; +-} +- +-static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +-{ +- return InterlockedExchangePointer(x, val); +-} +- +-#else /* _WIN32 */ +- +-#include +- + union vkd3d_thread_handle + { ++#ifndef _WIN32 + pthread_t pthread; ++#endif + void *handle; + }; + +-# if HAVE_SYNC_BOOL_COMPARE_AND_SWAP +-static inline bool vkd3d_atomic_compare_exchange(unsigned int volatile *x, unsigned int cmp, unsigned int xchg) +-{ +- return __sync_bool_compare_and_swap(x, cmp, xchg); +-} +- +-static inline bool vkd3d_atomic_compare_exchange_pointer(void * volatile *x, void *cmp, void *xchg) +-{ +- return __sync_bool_compare_and_swap(x, cmp, xchg); +-} +-# else +-# error "vkd3d_atomic_compare_exchange() not implemented for this platform" +-# endif +- +-# if HAVE_ATOMIC_EXCHANGE_N +-static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +-{ +- return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +-} +- +-static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +-{ +- return __atomic_exchange_n(x, val, __ATOMIC_SEQ_CST); +-} +-# elif HAVE_SYNC_BOOL_COMPARE_AND_SWAP +-static inline unsigned int vkd3d_atomic_exchange(unsigned int volatile *x, unsigned int val) +-{ +- unsigned int i; +- do +- { +- i = *x; +- } while (!__sync_bool_compare_and_swap(x, i, val)); +- return i; +-} +- +-static inline void *vkd3d_atomic_exchange_pointer(void * volatile *x, void *val) +-{ +- void *p; +- do +- { +- p = *x; +- } while (!__sync_bool_compare_and_swap(x, p, val)); +- return p; +-} +-# else +-# error "vkd3d_atomic_exchange() not implemented for this platform" +-# endif +- +-#endif /* _WIN32 */ +- + HRESULT vkd3d_create_thread(struct vkd3d_instance *instance, + PFN_vkd3d_thread thread_main, void *data, union vkd3d_thread_handle *thread); + HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_handle *thread); +@@ -742,7 +666,7 @@ static inline bool vkd3d_view_incref(void *desc) + if (refcount <= 0) + return false; + } +- while (!vkd3d_atomic_compare_exchange(&h->refcount, refcount, refcount + 1)); ++ while (!vkd3d_atomic_compare_exchange_u32(&h->refcount, refcount, refcount + 1)); + + return true; + } +@@ -1852,5 +1776,9 @@ struct vkd3d_shader_cache; + int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache); + unsigned int vkd3d_shader_cache_incref(struct vkd3d_shader_cache *cache); + unsigned int vkd3d_shader_cache_decref(struct vkd3d_shader_cache *cache); ++int vkd3d_shader_cache_put(struct vkd3d_shader_cache *cache, ++ const void *key, size_t key_size, const void *value, size_t value_size); ++int vkd3d_shader_cache_get(struct vkd3d_shader_cache *cache, ++ const void *key, size_t key_size, void *value, size_t *value_size); + + #endif /* __VKD3D_PRIVATE_H */ +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch new file mode 100644 index 00000000..7d13a011 --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch @@ -0,0 +1,1497 @@ +From f34d3681ed967342322403d4c62eb9d1d72cdf06 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Fri, 3 May 2024 07:31:39 +1000 +Subject: [PATCH] Updated vkd3d to 62a512c4f8c4070f0f4f3ed8e70b6f0bc885da30. + +--- + libs/vkd3d/include/vkd3d.h | 91 +++++++++- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 11 ++ + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 22 +-- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 155 +++++++++++++++++- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 39 +++-- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1 + + .../libs/vkd3d-shader/hlsl_constant_ops.c | 130 +++++++++++++++ + libs/vkd3d/libs/vkd3d-shader/spirv.c | 113 +++++++++++-- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 44 ++--- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 31 ++-- + libs/vkd3d/libs/vkd3d/command.c | 21 +-- + libs/vkd3d/libs/vkd3d/device.c | 36 +++- + libs/vkd3d/libs/vkd3d/state.c | 2 + + 14 files changed, 595 insertions(+), 102 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index 71c56331d86..38249f0bf5c 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -237,47 +237,134 @@ struct vkd3d_host_time_domain_info + uint64_t ticks_per_second; + }; + ++/** ++ * A chained structure containing device creation parameters. ++ */ + struct vkd3d_device_create_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** The minimum feature level to request. Device creation will fail with E_INVALIDARG if the ++ * Vulkan device doesn't have the features needed to fulfill the request. */ + D3D_FEATURE_LEVEL minimum_feature_level; + ++ /** ++ * The vkd3d instance to use to create a device. Either this or instance_create_info must be ++ * set. ++ */ + struct vkd3d_instance *instance; ++ /** ++ * The parameters used to create an instance, which is then used to create a device. Either ++ * this or instance must be set. ++ */ + const struct vkd3d_instance_create_info *instance_create_info; + ++ /** ++ * The Vulkan physical device to use. If it is NULL, the first physical device found is used, ++ * prioritizing discrete GPUs over integrated GPUs and integrated GPUs over all the others. ++ * ++ * This parameter can be overridden by setting environment variable VKD3D_VULKAN_DEVICE. ++ */ + VkPhysicalDevice vk_physical_device; + ++ /** ++ * A list of Vulkan device extensions to request. They are intended as required, so device ++ * creation will fail if any of them is not available. ++ */ + const char * const *device_extensions; ++ /** The number of elements in the device_extensions array. */ + uint32_t device_extension_count; + ++ /** ++ * An object to be set as the device parent. This is not used by vkd3d except for being ++ * returned by vkd3d_get_device_parent. ++ */ + IUnknown *parent; ++ /** ++ * The adapter LUID to be set for the device. This is not used by vkd3d except for being ++ * returned by GetAdapterLuid. ++ */ + LUID adapter_luid; + }; + +-/* Extends vkd3d_device_create_info. Available since 1.2. */ ++/** ++ * A chained structure to specify optional device extensions. ++ * ++ * This structure extends vkd3d_device_create_info. ++ * ++ * \since 1.2 ++ */ + struct vkd3d_optional_device_extensions_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** ++ * A list of optional Vulkan device extensions to request. Device creation does not fail if ++ * they are not available. ++ */ + const char * const *extensions; ++ /** The number of elements in the extensions array. */ + uint32_t extension_count; + }; + +-/* vkd3d_image_resource_create_info flags */ ++/** ++ * When specified as a flag of vkd3d_image_resource_create_info, it means that vkd3d will do the ++ * initial transition operation on the image from VK_IMAGE_LAYOUT_UNDEFINED to its appropriate ++ * Vulkan layout (depending on its D3D12 resource state). If this flag is not specified the caller ++ * is responsible for transitioning the Vulkan image to the appropriate layout. ++ */ + #define VKD3D_RESOURCE_INITIAL_STATE_TRANSITION 0x00000001 ++/** ++ * When specified as a flag of vkd3d_image_resource_create_info, it means that field present_state ++ * is honored. ++ */ + #define VKD3D_RESOURCE_PRESENT_STATE_TRANSITION 0x00000002 + ++/** ++ * A chained structure containing the parameters to create a D3D12 resource backed by a Vulkan ++ * image. ++ */ + struct vkd3d_image_resource_create_info + { ++ /** Must be set to VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO. */ + enum vkd3d_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ + const void *next; + ++ /** The Vulkan image that backs the resource. */ + VkImage vk_image; ++ /** The resource description. */ + D3D12_RESOURCE_DESC desc; ++ /** ++ * A combination of zero or more flags. The valid flags are ++ * VKD3D_RESOURCE_INITIAL_STATE_TRANSITION and VKD3D_RESOURCE_PRESENT_STATE_TRANSITION. ++ */ + unsigned int flags; ++ /** ++ * This field specifies how to handle resource state D3D12_RESOURCE_STATE_PRESENT for ++ * the resource. Notice that on D3D12 there is no difference between ++ * D3D12_RESOURCE_STATE_COMMON and D3D12_RESOURCE_STATE_PRESENT (they have the same value), ++ * while on Vulkan two different layouts are used (VK_IMAGE_LAYOUT_GENERAL and ++ * VK_IMAGE_LAYOUT_PRESENT_SRC_KHR). ++ * ++ * * When flag VKD3D_RESOURCE_PRESENT_STATE_TRANSITION is not specified, field ++ * present_state is ignored and resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is ++ * mapped to VK_IMAGE_LAYOUT_GENERAL; this is useful for non-swapchain resources. ++ * * Otherwise, when present_state is D3D12_RESOURCE_STATE_PRESENT/_COMMON, resource state ++ * D3D12_RESOURCE_STATE_COMMON/_PRESENT is mapped to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; ++ * this is useful for swapchain resources that are directly backed by a Vulkan swapchain ++ * image. ++ * * Otherwise, resource state D3D12_RESOURCE_STATE_COMMON/_PRESENT is treated as resource ++ * state present_state; this is useful for swapchain resources that backed by a Vulkan ++ * non-swapchain image, which the client will likely consume with a copy or drawing ++ * operation at presentation time. ++ */ + D3D12_RESOURCE_STATES present_state; + }; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index cd8ba0a7d2b..b2f329cd199 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -327,6 +327,9 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_UTOD ] = "utod", + [VKD3DSIH_UTOF ] = "utof", + [VKD3DSIH_UTOU ] = "utou", ++ [VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL ] = "wave_active_all_equal", ++ [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", ++ [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", + [VKD3DSIH_XOR ] = "xor", + }; + +@@ -1161,6 +1164,14 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + vkd3d_string_buffer_printf(buffer, "sr"); + break; + ++ case VKD3DSPR_WAVELANECOUNT: ++ vkd3d_string_buffer_printf(buffer, "vWaveLaneCount"); ++ break; ++ ++ case VKD3DSPR_WAVELANEINDEX: ++ vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); ++ break; ++ + default: + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->type, compiler->colours.reset); +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 09e4f596241..aa2358440e5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -215,8 +215,12 @@ struct vkd3d_shader_sm1_parser + + struct vkd3d_shader_parser p; + ++ struct ++ { + #define MAX_CONSTANT_COUNT 8192 +- uint32_t constant_def_mask[3][VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; ++ uint32_t def_mask[VKD3D_BITMAP_SIZE(MAX_CONSTANT_COUNT)]; ++ uint32_t count; ++ } constants[3]; + }; + + /* This table is not order or position dependent. */ +@@ -750,15 +754,13 @@ static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser * + static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, + enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def) + { +- struct vkd3d_shader_desc *desc = &sm1->p.shader_desc; +- +- desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1); ++ sm1->constants[set].count = max(sm1->constants[set].count, index + 1); + if (from_def) + { + /* d3d shaders have a maximum of 8192 constants; we should not overrun + * this array. */ +- assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set])); +- bitmap_set(sm1->constant_def_mask[set], index); ++ assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); ++ bitmap_set(sm1->constants[set].def_mask, index); + } + } + +@@ -1301,9 +1303,9 @@ static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1, + /* Find the highest constant index which is not written by a DEF + * instruction. We can't (easily) use an FFZ function for this since it + * needs to be limited by the highest used register index. */ +- for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j) ++ for (j = sm1->constants[set].count; j > 0; --j) + { +- if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1)) ++ if (!bitmap_is_set(sm1->constants[set].def_mask, j - 1)) + return j; + } + +@@ -1354,8 +1356,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + ++instructions->count; + } + +- for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i) +- sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); ++ for (i = 0; i < ARRAY_SIZE(sm1->p.program.flat_constant_count); ++i) ++ sm1->p.program.flat_constant_count[i] = get_external_constant_count(sm1, i); + + if (!sm1->p.failed) + ret = vkd3d_shader_parser_validate(&sm1->p); +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 6a1fb6bddb7..e636ad917db 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -31,7 +31,7 @@ static const uint64_t GLOBALVAR_FLAG_EXPLICIT_TYPE = 2; + static const unsigned int GLOBALVAR_ADDRESS_SPACE_SHIFT = 2; + static const uint64_t ALLOCA_FLAG_IN_ALLOCA = 0x20; + static const uint64_t ALLOCA_FLAG_EXPLICIT_TYPE = 0x40; +-static const uint64_t ALLOCA_ALIGNMENT_MASK = ALLOCA_FLAG_IN_ALLOCA - 1; ++static const uint64_t ALLOCA_ALIGNMENT_MASK = 0x1f; + static const unsigned int SHADER_DESCRIPTOR_TYPE_COUNT = 4; + static const size_t MAX_IR_INSTRUCTIONS_PER_DXIL_INSTRUCTION = 11; + +@@ -103,6 +103,7 @@ enum bitcode_constant_code + CST_CODE_INTEGER = 4, + CST_CODE_FLOAT = 6, + CST_CODE_STRING = 8, ++ CST_CODE_CE_CAST = 11, + CST_CODE_CE_GEP = 12, + CST_CODE_CE_INBOUNDS_GEP = 20, + CST_CODE_DATA = 22, +@@ -413,6 +414,7 @@ enum dx_intrinsic_opcode + DX_DERIV_COARSEY = 84, + DX_DERIV_FINEX = 85, + DX_DERIV_FINEY = 86, ++ DX_COVERAGE = 91, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, + DX_THREAD_ID_IN_GROUP = 95, +@@ -425,6 +427,11 @@ enum dx_intrinsic_opcode + DX_STORE_PATCH_CONSTANT = 106, + DX_OUTPUT_CONTROL_POINT_ID = 107, + DX_PRIMITIVE_ID = 108, ++ DX_WAVE_GET_LANE_INDEX = 111, ++ DX_WAVE_GET_LANE_COUNT = 112, ++ DX_WAVE_ANY_TRUE = 113, ++ DX_WAVE_ALL_TRUE = 114, ++ DX_WAVE_ACTIVE_ALL_EQUAL = 115, + DX_LEGACY_F32TOF16 = 130, + DX_LEGACY_F16TOF32 = 131, + DX_RAW_BUFFER_LOAD = 139, +@@ -606,6 +613,7 @@ struct sm6_value + enum sm6_value_type value_type; + unsigned int structure_stride; + bool is_undefined; ++ bool is_back_ref; + union + { + struct sm6_function_data function; +@@ -2216,6 +2224,11 @@ static bool sm6_value_is_ssa(const struct sm6_value *value) + return sm6_value_is_register(value) && register_is_ssa(&value->u.reg); + } + ++static bool sm6_value_is_numeric_array(const struct sm6_value *value) ++{ ++ return sm6_value_is_register(value) && register_is_numeric_array(&value->u.reg); ++} ++ + static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) + { + if (!sm6_value_is_constant(value)) +@@ -2658,6 +2671,18 @@ static bool sm6_value_validate_is_pointer(const struct sm6_value *value, struct + return true; + } + ++static bool sm6_value_validate_is_backward_ref(const struct sm6_value *value, struct sm6_parser *sm6) ++{ ++ if (!value->is_back_ref) ++ { ++ FIXME("Forward-referenced pointers are not supported.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Forward-referenced pointer declarations are not supported."); ++ return false; ++ } ++ return true; ++} ++ + static bool sm6_value_validate_is_numeric(const struct sm6_value *value, struct sm6_parser *sm6) + { + if (!sm6_type_is_numeric(value->type)) +@@ -3086,15 +3111,16 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c + static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const struct dxil_block *block) + { + enum vkd3d_shader_register_type reg_type = VKD3DSPR_INVALID; +- const struct sm6_type *type, *elem_type; ++ const struct sm6_type *type, *elem_type, *ptr_type; ++ size_t i, base_value_idx, value_idx; + enum vkd3d_data_type reg_data_type; + const struct dxil_record *record; ++ const struct sm6_value *src; + enum vkd3d_result ret; + struct sm6_value *dst; +- size_t i, value_idx; + uint64_t value; + +- for (i = 0, type = NULL; i < block->record_count; ++i) ++ for (i = 0, type = NULL, base_value_idx = sm6->value_count; i < block->record_count; ++i) + { + sm6->p.location.column = i; + record = block->records[i]; +@@ -3135,6 +3161,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + dst = sm6_parser_get_current_value(sm6); + dst->type = type; + dst->value_type = VALUE_TYPE_REG; ++ dst->is_back_ref = true; + vsir_register_init(&dst->u.reg, reg_type, reg_data_type, 0); + + switch (record->code) +@@ -3209,6 +3236,48 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + return ret; + break; + ++ case CST_CODE_CE_CAST: ++ if (!dxil_record_validate_operand_count(record, 3, 3, sm6)) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if ((value = record->operands[0]) != CAST_BITCAST) ++ { ++ WARN("Unhandled constexpr cast op %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Constexpr cast op %"PRIu64" is unhandled.", value); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ ptr_type = sm6_parser_get_type(sm6, record->operands[1]); ++ if (!sm6_type_is_pointer(ptr_type)) ++ { ++ WARN("Constexpr cast at constant idx %zu is not a pointer.\n", value_idx); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Constexpr cast source operand is not a pointer."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if ((value = record->operands[2]) >= sm6->cur_max_value) ++ { ++ WARN("Invalid value index %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value index %"PRIu64".", value); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ else if (value == value_idx) ++ { ++ WARN("Invalid value self-reference at %"PRIu64".\n", value); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Invalid value self-reference for a constexpr cast."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ /* Resolve later in case forward refs exist. */ ++ dst->type = type; ++ dst->u.reg.type = VKD3DSPR_COUNT; ++ dst->u.reg.idx[0].offset = value; ++ break; ++ + case CST_CODE_UNDEF: + dxil_record_validate_operand_max_count(record, 0, sm6); + dst->u.reg.type = VKD3DSPR_UNDEF; +@@ -3234,6 +3303,29 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + ++sm6->value_count; + } + ++ /* Resolve cast forward refs. */ ++ for (i = base_value_idx; i < sm6->value_count; ++i) ++ { ++ dst = &sm6->values[i]; ++ if (dst->u.reg.type != VKD3DSPR_COUNT) ++ continue; ++ ++ type = dst->type; ++ ++ src = &sm6->values[dst->u.reg.idx[0].offset]; ++ if (!sm6_value_is_numeric_array(src)) ++ { ++ WARN("Value is not an array.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Constexpr cast source value is not a global array element."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ *dst = *src; ++ dst->type = type; ++ dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type); ++ } ++ + return VKD3D_OK; + } + +@@ -3462,6 +3554,7 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ + dst = sm6_parser_get_current_value(sm6); + dst->type = type; + dst->value_type = VALUE_TYPE_REG; ++ dst->is_back_ref = true; + + if (is_constant && !init) + { +@@ -3946,7 +4039,8 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) +- || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) ++ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) +@@ -4421,6 +4515,12 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) + return VKD3DSIH_F32TOF16; + case DX_LEGACY_F16TOF32: + return VKD3DSIH_F16TOF32; ++ case DX_WAVE_ACTIVE_ALL_EQUAL: ++ return VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL; ++ case DX_WAVE_ALL_TRUE: ++ return VKD3DSIH_WAVE_ALL_TRUE; ++ case DX_WAVE_ANY_TRUE: ++ return VKD3DSIH_WAVE_ANY_TRUE; + default: + vkd3d_unreachable(); + } +@@ -4729,6 +4829,12 @@ static void sm6_parser_emit_dx_input_register_mov(struct sm6_parser *sm6, + instruction_dst_param_init_ssa_scalar(ins, sm6); + } + ++static void sm6_parser_emit_dx_coverage(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_COVERAGE, VKD3D_DATA_UINT); ++} ++ + static const struct sm6_descriptor_info *sm6_parser_get_descriptor(struct sm6_parser *sm6, + enum vkd3d_shader_descriptor_type type, unsigned int id, const struct sm6_value *address) + { +@@ -5804,6 +5910,26 @@ static void sm6_parser_emit_dx_texture_store(struct sm6_parser *sm6, enum dx_int + dst_param_init_with_mask(dst_param, write_mask); + } + ++static void sm6_parser_emit_dx_wave_builtin(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ enum vkd3d_shader_register_type type; ++ ++ switch (op) ++ { ++ case DX_WAVE_GET_LANE_COUNT: ++ type = VKD3DSPR_WAVELANECOUNT; ++ break; ++ case DX_WAVE_GET_LANE_INDEX: ++ type = VKD3DSPR_WAVELANEINDEX; ++ break; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ sm6_parser_emit_dx_input_register_mov(sm6, state->ins, type, VKD3D_DATA_UINT); ++} ++ + struct sm6_dx_opcode_info + { + const char *ret_type; +@@ -5820,6 +5946,7 @@ struct sm6_dx_opcode_info + C -> constant or undefined int8/16/32 + i -> int32 + m -> int16/32/64 ++ n -> any numeric + f -> float + d -> double + e -> half/float +@@ -5847,6 +5974,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, + [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, + [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, ++ [DX_COVERAGE ] = {"i", "", sm6_parser_emit_dx_coverage}, + [DX_CREATE_HANDLE ] = {"H", "ccib", sm6_parser_emit_dx_create_handle}, + [DX_DERIV_COARSEX ] = {"e", "R", sm6_parser_emit_dx_unary}, + [DX_DERIV_COARSEY ] = {"e", "R", sm6_parser_emit_dx_unary}, +@@ -5922,6 +6050,11 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, + [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, + [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, ++ [DX_WAVE_ACTIVE_ALL_EQUAL ] = {"1", "n", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, ++ [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, ++ [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, + }; + + static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, +@@ -5953,6 +6086,8 @@ static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struc + return sm6_type_is_i32(type); + case 'm': + return sm6_type_is_i16_i32_i64(type); ++ case 'n': ++ return sm6_type_is_numeric(type); + case 'f': + return sm6_type_is_float(type); + case 'd': +@@ -6446,7 +6581,8 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re + uint64_t code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) +- || !sm6_value_validate_is_pointer_to_i32(ptr, sm6)) ++ || !sm6_value_validate_is_pointer_to_i32(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6)) + return; + + if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) +@@ -6687,6 +6823,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor + return; + if (!sm6_value_validate_is_register(ptr, sm6) + || !sm6_value_validate_is_pointer(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6) + || !dxil_record_validate_operand_count(record, i + 2, i + 3, sm6)) + return; + +@@ -6870,7 +7007,8 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) + || !sm6_value_validate_is_register(ptr, sm6) +- || !sm6_value_validate_is_pointer(ptr, sm6)) ++ || !sm6_value_validate_is_pointer(ptr, sm6) ++ || !sm6_value_validate_is_backward_ref(ptr, sm6)) + { + return; + } +@@ -7478,6 +7616,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + fwd_type = dst->type; + dst->type = NULL; + dst->value_type = VALUE_TYPE_REG; ++ dst->is_back_ref = true; + is_terminator = false; + + record = block->records[i]; +@@ -8081,7 +8220,9 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = + { + [SEMANTIC_KIND_ARBITRARY] = VKD3D_SHADER_SV_NONE, + [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, ++ [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, + [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, ++ [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, + [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, + [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, + [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 4fc1493bdce..0b48b17d21c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -3517,6 +3517,7 @@ static int compare_function_rb(const void *key, const struct rb_entry *entry) + + static void declare_predefined_types(struct hlsl_ctx *ctx) + { ++ struct vkd3d_string_buffer *name; + unsigned int x, y, bt, i, v; + struct hlsl_type *type; + +@@ -3529,7 +3530,6 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + "uint", + "bool", + }; +- char name[15]; + + static const char *const variants_float[] = {"min10float", "min16float"}; + static const char *const variants_int[] = {"min12int", "min16int"}; +@@ -3573,28 +3573,34 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + {"technique11", 11}, + }; + ++ if (!(name = hlsl_get_string_buffer(ctx))) ++ return; ++ + for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) + { + for (y = 1; y <= 4; ++y) + { + for (x = 1; x <= 4; ++x) + { +- sprintf(name, "%s%ux%u", names[bt], y, x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%ux%u", names[bt], y, x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.matrix[bt][x - 1][y - 1] = type; + + if (y == 1) + { +- sprintf(name, "%s%u", names[bt], x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%u", names[bt], x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.vector[bt][x - 1] = type; + + if (x == 1) + { +- sprintf(name, "%s", names[bt]); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s", names[bt]); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); + hlsl_scope_add_type(ctx->globals, type); + ctx->builtin_types.scalar[bt] = type; + } +@@ -3637,22 +3643,25 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + { + for (x = 1; x <= 4; ++x) + { +- sprintf(name, "%s%ux%u", variants[v], y, x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_MATRIX, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%ux%u", variants[v], y, x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_MATRIX, bt, x, y); + type->is_minimum_precision = 1; + hlsl_scope_add_type(ctx->globals, type); + + if (y == 1) + { +- sprintf(name, "%s%u", variants[v], x); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_VECTOR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s%u", variants[v], x); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_VECTOR, bt, x, y); + type->is_minimum_precision = 1; + hlsl_scope_add_type(ctx->globals, type); + + if (x == 1) + { +- sprintf(name, "%s", variants[v]); +- type = hlsl_new_type(ctx, name, HLSL_CLASS_SCALAR, bt, x, y); ++ vkd3d_string_buffer_clear(name); ++ vkd3d_string_buffer_printf(name, "%s", variants[v]); ++ type = hlsl_new_type(ctx, name->buffer, HLSL_CLASS_SCALAR, bt, x, y); + type->is_minimum_precision = 1; + hlsl_scope_add_type(ctx->globals, type); + } +@@ -3690,6 +3699,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + type->e.version = technique_types[i].version; + hlsl_scope_add_type(ctx->globals, type); + } ++ ++ hlsl_release_string_buffer(ctx, name); + } + + static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compile_info *compile_info, +@@ -3965,7 +3976,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + } + if (ret >= 0) + { +- ret = vkd3d_shader_parser_compile(parser, &info, out, message_context); ++ ret = vsir_program_compile(&parser->program, parser->config_flags, &info, out, message_context); + vkd3d_shader_parser_destroy(parser); + } + vkd3d_shader_free_shader_code(&info.source); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 1e5f0805152..c3a4c6bd291 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -1422,6 +1422,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + + bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); ++bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index a6d6b336b40..94acb70fff9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -5427,6 +5427,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index 4cea98e9286..51f2f9cc050 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -1396,6 +1396,136 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return success; + } + ++static bool constant_is_zero(struct hlsl_ir_constant *const_arg) ++{ ++ struct hlsl_type *data_type = const_arg->node.data_type; ++ unsigned int k; ++ ++ for (k = 0; k < data_type->dimx; ++k) ++ { ++ switch (data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (const_arg->value.u[k].f != 0.0f) ++ return false; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (const_arg->value.u[k].d != 0.0) ++ return false; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_BOOL: ++ if (const_arg->value.u[k].u != 0) ++ return false; ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool constant_is_one(struct hlsl_ir_constant *const_arg) ++{ ++ struct hlsl_type *data_type = const_arg->node.data_type; ++ unsigned int k; ++ ++ for (k = 0; k < data_type->dimx; ++k) ++ { ++ switch (data_type->base_type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (const_arg->value.u[k].f != 1.0f) ++ return false; ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (const_arg->value.u[k].d != 1.0) ++ return false; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_BOOL: ++ if (const_arg->value.u[k].u != 1) ++ return false; ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_constant *const_arg = NULL; ++ struct hlsl_ir_node *mut_arg = NULL; ++ struct hlsl_ir_node *res_node; ++ struct hlsl_ir_expr *expr; ++ unsigned int i; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ ++ if (instr->data_type->class > HLSL_CLASS_VECTOR) ++ return false; ++ ++ /* Verify that the expression has two operands. */ ++ for (i = 0; i < ARRAY_SIZE(expr->operands); ++i) ++ { ++ if (!!expr->operands[i].node != (i < 2)) ++ return false; ++ } ++ ++ if (expr->operands[0].node->type == HLSL_IR_CONSTANT) ++ { ++ const_arg = hlsl_ir_constant(expr->operands[0].node); ++ mut_arg = expr->operands[1].node; ++ } ++ else if (expr->operands[1].node->type == HLSL_IR_CONSTANT) ++ { ++ mut_arg = expr->operands[0].node; ++ const_arg = hlsl_ir_constant(expr->operands[1].node); ++ } ++ else ++ { ++ return false; ++ } ++ ++ res_node = NULL; ++ switch (expr->op) ++ { ++ case HLSL_OP2_ADD: ++ if (constant_is_zero(const_arg)) ++ res_node = mut_arg; ++ break; ++ ++ case HLSL_OP2_MUL: ++ if (constant_is_one(const_arg)) ++ res_node = mut_arg; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (res_node) ++ { ++ hlsl_replace_node(&expr->node, res_node); ++ return true; ++ } ++ return false; ++} ++ + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_constant_value value; +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index c4e712b8471..dc9e8c06a5e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -361,6 +361,7 @@ struct vkd3d_spirv_builder + uint32_t type_sampler_id; + uint32_t type_bool_id; + uint32_t type_void_id; ++ uint32_t scope_subgroup_id; + + struct vkd3d_spirv_stream debug_stream; /* debug instructions */ + struct vkd3d_spirv_stream annotation_stream; /* decoration instructions */ +@@ -1741,6 +1742,16 @@ static void vkd3d_spirv_build_op_memory_barrier(struct vkd3d_spirv_builder *buil + SpvOpMemoryBarrier, memory_id, memory_semantics_id); + } + ++static uint32_t vkd3d_spirv_build_op_scope_subgroup(struct vkd3d_spirv_builder *builder) ++{ ++ return vkd3d_spirv_get_op_constant(builder, vkd3d_spirv_get_op_type_int(builder, 32, 0), SpvScopeSubgroup); ++} ++ ++static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *builder) ++{ ++ return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); ++} ++ + static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, + enum GLSLstd450 op, uint32_t result_type, uint32_t operand) + { +@@ -2453,8 +2464,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) + static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, +- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, +- uint64_t config_flags) ++ struct vkd3d_shader_message_context *message_context, uint64_t config_flags) + { + const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; + const struct shader_signature *output_signature = &program->output_signature; +@@ -2470,7 +2480,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + + memset(compiler, 0, sizeof(*compiler)); + compiler->message_context = message_context; +- compiler->location = *location; ++ compiler->location.source_name = compile_info->source_name; + compiler->config_flags = config_flags; + + if ((target_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO))) +@@ -2632,6 +2642,11 @@ static bool spirv_compiler_is_opengl_target(const struct spirv_compiler *compile + return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5; + } + ++static bool spirv_compiler_is_spirv_min_1_3_target(const struct spirv_compiler *compiler) ++{ ++ return spirv_compiler_get_target_environment(compiler) == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; ++} ++ + static bool spirv_compiler_is_target_extension_supported(const struct spirv_compiler *compiler, + enum vkd3d_shader_spirv_extension extension) + { +@@ -3150,6 +3165,12 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s + case VKD3DSPR_OUTSTENCILREF: + snprintf(buffer, buffer_size, "oStencilRef"); + break; ++ case VKD3DSPR_WAVELANECOUNT: ++ snprintf(buffer, buffer_size, "vWaveLaneCount"); ++ break; ++ case VKD3DSPR_WAVELANEINDEX: ++ snprintf(buffer, buffer_size, "vWaveLaneIndex"); ++ break; + default: + FIXME("Unhandled register %#x.\n", reg->type); + snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); +@@ -4535,6 +4556,10 @@ static void spirv_compiler_decorate_builtin(struct spirv_compiler *compiler, + case SpvBuiltInCullDistance: + vkd3d_spirv_enable_capability(builder, SpvCapabilityCullDistance); + break; ++ case SpvBuiltInSubgroupSize: ++ case SpvBuiltInSubgroupLocalInvocationId: ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); ++ break; + default: + break; + } +@@ -4724,6 +4749,9 @@ vkd3d_register_builtins[] = + {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + + {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, ++ ++ {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, ++ {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, + }; + + static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, +@@ -5772,6 +5800,23 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler + flags &= ~VKD3DSGF_ENABLE_INT64; + } + ++ if (flags & VKD3DSGF_ENABLE_WAVE_INTRINSICS) ++ { ++ if (!(compiler->features & VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS)) ++ { ++ WARN("Unsupported wave ops.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, ++ "The target environment does not support wave ops."); ++ } ++ else if (!spirv_compiler_is_spirv_min_1_3_target(compiler)) ++ { ++ WARN("Wave ops enabled but environment does not support SPIR-V 1.3 or greater.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, ++ "The target environment uses wave ops but does not support SPIR-V 1.3 or greater."); ++ } ++ flags &= ~VKD3DSGF_ENABLE_WAVE_INTRINSICS; ++ } ++ + if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) + FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); + else +@@ -9713,6 +9758,41 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_end_primitive(builder); + } + ++static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) ++{ ++ switch (handler_idx) ++ { ++ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: ++ return SpvOpGroupNonUniformAllEqual; ++ case VKD3DSIH_WAVE_ALL_TRUE: ++ return SpvOpGroupNonUniformAll; ++ case VKD3DSIH_WAVE_ANY_TRUE: ++ return SpvOpGroupNonUniformAny; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id; ++ SpvOp op; ++ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); ++ ++ op = map_wave_bool_op(instruction->handler_idx); ++ type_id = vkd3d_spirv_get_op_type_bool(builder); ++ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); ++ val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, ++ type_id, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ + /* This function is called after declarations are processed. */ + static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) + { +@@ -9732,6 +9812,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + { + int ret = VKD3D_OK; + ++ compiler->location = instruction->location; ++ + switch (instruction->handler_idx) + { + case VKD3DSIH_DCL_GLOBAL_FLAGS: +@@ -10055,6 +10137,11 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_CUT_STREAM: + spirv_compiler_emit_cut_stream(compiler, instruction); + break; ++ case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: ++ case VKD3DSIH_WAVE_ALL_TRUE: ++ case VKD3DSIH_WAVE_ANY_TRUE: ++ spirv_compiler_emit_wave_bool_op(compiler, instruction); ++ break; + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: +@@ -10151,15 +10238,13 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c + } + } + +-static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, +- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_parser *parser, +- struct vkd3d_shader_code *spirv) ++static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *spirv) + { + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_shader_instruction_array instructions; +- struct vsir_program *program = &parser->program; + enum vkd3d_shader_spirv_environment environment; + enum vkd3d_result result = VKD3D_OK; + unsigned int i; +@@ -10175,9 +10260,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + + spirv_compiler_emit_descriptor_declarations(compiler); + +- compiler->location.column = 0; +- compiler->location.line = 1; +- + if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + +@@ -10202,7 +10284,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + + for (i = 0; i < instructions.count && result >= 0; ++i) + { +- compiler->location.line = i + 1; + result = spirv_compiler_handle_instruction(compiler, &instructions.elements[i]); + } + +@@ -10249,7 +10330,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + if (!vkd3d_spirv_compile_module(builder, spirv, spirv_compiler_get_entry_point_name(compiler), environment)) + return VKD3D_ERROR; + +- if (TRACE_ON() || parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) ++ if (TRACE_ON() || compiler->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION) + { + struct vkd3d_string_buffer buffer; + +@@ -10287,7 +10368,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + return VKD3D_OK; + } + +-int spirv_compile(struct vkd3d_shader_parser *parser, ++int spirv_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +@@ -10295,14 +10376,14 @@ int spirv_compile(struct vkd3d_shader_parser *parser, + struct spirv_compiler *spirv_compiler; + int ret; + +- if (!(spirv_compiler = spirv_compiler_create(&parser->program, compile_info, +- scan_descriptor_info, message_context, &parser->location, parser->config_flags))) ++ if (!(spirv_compiler = spirv_compiler_create(program, compile_info, ++ scan_descriptor_info, message_context, config_flags))) + { + ERR("Failed to create SPIR-V compiler.\n"); + return VKD3D_ERROR; + } + +- ret = spirv_compiler_generate_spirv(spirv_compiler, compile_info, parser, out); ++ ret = spirv_compiler_generate_spirv(spirv_compiler, program, compile_info, out); + + spirv_compiler_destroy(spirv_compiler); + return ret; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index cb37efb53f7..29b2c1482a9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -1395,9 +1395,9 @@ static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_des + vkd3d_free(scan_descriptor_info->descriptors); + } + +-static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, ++static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, +- struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) ++ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) + { + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; +@@ -1428,27 +1428,27 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + descriptor_info1 = &local_descriptor_info1; + } + +- vkd3d_shader_scan_context_init(&context, &parser->program.shader_version, compile_info, ++ vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, + descriptor_info1, combined_sampler_info, message_context); + + if (TRACE_ON()) +- vkd3d_shader_trace(&parser->program); ++ vkd3d_shader_trace(program); + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- instruction = &parser->program.instructions.elements[i]; ++ instruction = &program->instructions.elements[i]; + if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) + break; + } + +- for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) ++ for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) + { +- unsigned int size = parser->shader_desc.flat_constant_count[i].external; + struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; + struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; ++ unsigned int size = program->flat_constant_count[i]; + struct vkd3d_shader_descriptor_info1 *d; + +- if (parser->shader_desc.flat_constant_count[i].external) ++ if (size) + { + if ((d = vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, + &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT))) +@@ -1458,11 +1458,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + + if (!ret && signature_info) + { +- if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->program.input_signature) ++ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &program->input_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, +- &parser->program.output_signature) ++ &program->output_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, +- &parser->program.patch_constant_signature)) ++ &program->patch_constant_signature)) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + } +@@ -1544,7 +1544,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + } + else + { +- ret = scan_with_parser(compile_info, &message_context, NULL, parser); ++ ret = vsir_program_scan(&parser->program, compile_info, &message_context, NULL); + vkd3d_shader_parser_destroy(parser); + } + } +@@ -1556,12 +1556,11 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + return ret; + } + +-int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; +- struct vsir_program *program = &parser->program; + struct vkd3d_shader_compile_info scan_info; + int ret; + +@@ -1574,17 +1573,18 @@ int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + break; + + case VKD3D_SHADER_TARGET_GLSL: +- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) + return ret; +- ret = glsl_compile(program, parser->config_flags, compile_info, out, message_context); ++ ret = glsl_compile(program, config_flags, compile_info, out, message_context); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + case VKD3D_SHADER_TARGET_SPIRV_TEXT: +- if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) + return ret; +- ret = spirv_compile(parser, &scan_descriptor_info, compile_info, out, message_context); ++ ret = spirv_compile(program, config_flags, &scan_descriptor_info, ++ compile_info, out, message_context); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + +@@ -1665,7 +1665,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + } + else + { +- ret = vkd3d_shader_parser_compile(parser, compile_info, out, &message_context); ++ ret = vsir_program_compile(&parser->program, parser->config_flags, compile_info, out, &message_context); + vkd3d_shader_parser_destroy(parser); + } + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 4434e6e98f2..07b5818cba9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -527,6 +527,9 @@ enum vkd3d_shader_opcode + VKD3DSIH_UTOD, + VKD3DSIH_UTOF, + VKD3DSIH_UTOU, ++ VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL, ++ VKD3DSIH_WAVE_ALL_TRUE, ++ VKD3DSIH_WAVE_ANY_TRUE, + VKD3DSIH_XOR, + + VKD3DSIH_INVALID, +@@ -590,6 +593,8 @@ enum vkd3d_shader_register_type + VKD3DSPR_OUTSTENCILREF, + VKD3DSPR_UNDEF, + VKD3DSPR_SSA, ++ VKD3DSPR_WAVELANECOUNT, ++ VKD3DSPR_WAVELANEINDEX, + + VKD3DSPR_COUNT, + +@@ -1061,14 +1066,6 @@ struct dxbc_shader_desc + struct shader_signature patch_constant_signature; + }; + +-struct vkd3d_shader_desc +-{ +- struct +- { +- uint32_t used, external; +- } flat_constant_count[3]; +-}; +- + struct vkd3d_shader_register_semantic + { + struct vkd3d_shader_dst_param reg; +@@ -1249,6 +1246,12 @@ static inline bool register_is_scalar_constant_zero(const struct vkd3d_shader_re + && (data_type_is_64_bit(reg->data_type) ? !reg->u.immconst_u64[0] : !reg->u.immconst_u32[0]); + } + ++static inline bool register_is_numeric_array(const struct vkd3d_shader_register *reg) ++{ ++ return (reg->type == VKD3DSPR_IMMCONSTBUFFER || reg->type == VKD3DSPR_IDXTEMP ++ || reg->type == VKD3DSPR_GROUPSHAREDMEM); ++} ++ + static inline bool vsir_register_is_label(const struct vkd3d_shader_register *reg) + { + return reg->type == VKD3DSPR_LABEL; +@@ -1332,6 +1335,7 @@ struct vsir_program + struct shader_signature patch_constant_signature; + + unsigned int input_control_point_count, output_control_point_count; ++ unsigned int flat_constant_count[3]; + unsigned int block_count; + unsigned int temp_count; + unsigned int ssa_count; +@@ -1341,8 +1345,11 @@ struct vsir_program + size_t block_name_count; + }; + +-bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); + void vsir_program_cleanup(struct vsir_program *program); ++int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context); ++bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); + enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, +@@ -1366,7 +1373,6 @@ struct vkd3d_shader_parser + struct vkd3d_shader_location location; + bool failed; + +- struct vkd3d_shader_desc shader_desc; + const struct vkd3d_shader_parser_ops *ops; + struct vsir_program program; + +@@ -1378,9 +1384,6 @@ struct vkd3d_shader_parser_ops + void (*parser_destroy)(struct vkd3d_shader_parser *parser); + }; + +-int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, +- struct vkd3d_shader_message_context *message_context); + void vkd3d_shader_parser_error(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, +@@ -1552,7 +1555,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, + + #define SPIRV_MAX_SRC_COUNT 6 + +-int spirv_compile(struct vkd3d_shader_parser *parser, ++int spirv_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 4a69ff530da..95366d3441b 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -2052,20 +2052,15 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, + * state when GPU finishes execution of a command list. */ + if (is_swapchain_image) + { +- if (resource->present_state == D3D12_RESOURCE_STATE_PRESENT) +- { +- *access_mask = VK_ACCESS_MEMORY_READ_BIT; +- *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; +- if (image_layout) +- *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; +- return true; +- } +- else if (resource->present_state != D3D12_RESOURCE_STATE_COMMON) +- { +- vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, ++ if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) ++ return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, + resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); +- return true; +- } ++ ++ *access_mask = VK_ACCESS_MEMORY_READ_BIT; ++ *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; ++ if (image_layout) ++ *image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; ++ return true; + } + + *access_mask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index cb2b6ad0364..3f3332dd3e3 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -788,6 +788,11 @@ VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance) + return instance->vk_instance; + } + ++static bool d3d12_device_environment_is_vulkan_min_1_1(struct d3d12_device *device) ++{ ++ return device->environment == VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1; ++} ++ + struct vkd3d_physical_device_info + { + /* properties */ +@@ -796,6 +801,7 @@ struct vkd3d_physical_device_info + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; ++ VkPhysicalDeviceSubgroupProperties subgroup_properties; + + VkPhysicalDeviceProperties2KHR properties2; + +@@ -838,6 +844,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; + VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; ++ VkPhysicalDeviceSubgroupProperties *subgroup_properties; + + memset(info, 0, sizeof(*info)); + conditional_rendering_features = &info->conditional_rendering_features; +@@ -857,6 +864,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + formats4444_features = &info->formats4444_features; + xfb_features = &info->xfb_features; + xfb_properties = &info->xfb_properties; ++ subgroup_properties = &info->subgroup_properties; + + info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + +@@ -902,6 +910,9 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + vk_prepend_struct(&info->properties2, xfb_properties); + vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, vertex_divisor_properties); ++ subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; ++ if (d3d12_device_environment_is_vulkan_min_1_1(device)) ++ vk_prepend_struct(&info->properties2, subgroup_properties); + + if (vulkan_info->KHR_get_physical_device_properties2) + VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); +@@ -1509,6 +1520,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + struct vkd3d_physical_device_info *physical_device_info, + uint32_t *device_extension_count, bool **user_extension_supported) + { ++ const VkPhysicalDeviceSubgroupProperties *subgroup_properties = &physical_device_info->subgroup_properties; + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; + const struct vkd3d_optional_device_extensions_info *optional_extensions; +@@ -1520,6 +1532,16 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + uint32_t count; + VkResult vr; + ++ /* SHUFFLE is required to implement WaveReadLaneAt with dynamically uniform index before SPIR-V 1.5 / Vulkan 1.2. */ ++ static const VkSubgroupFeatureFlags required_subgroup_features = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT ++ | VK_SUBGROUP_FEATURE_BASIC_BIT ++ | VK_SUBGROUP_FEATURE_BALLOT_BIT ++ | VK_SUBGROUP_FEATURE_SHUFFLE_BIT ++ | VK_SUBGROUP_FEATURE_QUAD_BIT ++ | VK_SUBGROUP_FEATURE_VOTE_BIT; ++ ++ static const VkSubgroupFeatureFlags required_stages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; ++ + *device_extension_count = 0; + + vkd3d_trace_physical_device(physical_device, physical_device_info, vk_procs); +@@ -1583,10 +1605,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2; + + /* Shader Model 6 support. */ +- device->feature_options1.WaveOps = FALSE; +- device->feature_options1.WaveLaneCountMin = 0; +- device->feature_options1.WaveLaneCountMax = 0; +- device->feature_options1.TotalLaneCount = 0; ++ device->feature_options1.WaveOps = subgroup_properties->subgroupSize >= 4 ++ && (subgroup_properties->supportedOperations & required_subgroup_features) == required_subgroup_features ++ && (subgroup_properties->supportedStages & required_stages) == required_stages; ++ device->feature_options1.WaveLaneCountMin = subgroup_properties->subgroupSize; ++ device->feature_options1.WaveLaneCountMax = subgroup_properties->subgroupSize; ++ device->feature_options1.TotalLaneCount = 32 * subgroup_properties->subgroupSize; /* approx. */ + device->feature_options1.ExpandedComputeResourceStates = TRUE; + device->feature_options1.Int64ShaderOps = features->shaderInt64; + +@@ -3434,7 +3458,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + + TRACE("Request shader model %#x.\n", data->HighestShaderModel); + ++#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL ++ data->HighestShaderModel = D3D_SHADER_MODEL_6_0; ++#else + data->HighestShaderModel = D3D_SHADER_MODEL_5_1; ++#endif + + TRACE("Shader model %#x.\n", data->HighestShaderModel); + return S_OK; +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 6ba29c18004..199d8043ffe 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -2159,6 +2159,8 @@ static unsigned int feature_flags_compile_option(const struct d3d12_device *devi + flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_INT64; + if (device->feature_options.DoublePrecisionFloatShaderOps) + flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLOAT64; ++ if (device->feature_options1.WaveOps) ++ flags |= VKD3D_SHADER_COMPILE_OPTION_FEATURE_WAVE_OPS; + + return flags; + } +-- +2.43.0 +