From 05c875caa59f6380fae1bc9fbaff378b2cf70ee4 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 16 May 2024 11:58:46 +1000 Subject: [PATCH] Updated vkd3d-latest patchset --- ...-4a209efb6278586d412ceb0a7cbe21e6769.patch | 2 +- ...-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch | 2 +- ...-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch | 2 +- ...-13e1491941a1af32ddfc1019fa304231fd1.patch | 2 +- ...-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch | 2 +- ...-4b3a948edcb5e83074b63aad25ecf450dca.patch | 2 +- ...-9e57039fce4040c8bfadaa73bf449c00591.patch | 937 ++++++++++++++++++ ...-061dc390367b4c83022d5fe1255f8d38f6b.patch | 153 +++ 8 files changed, 1096 insertions(+), 6 deletions(-) create mode 100644 patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch create mode 100644 patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch index 3f3b6928..152c2f01 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-4a209efb6278586d412ceb0a7cbe21e6769.patch @@ -1,4 +1,4 @@ -From fe4f6075b7b5ec6e9bb86bd41c8f62e547d78e9e Mon Sep 17 00:00:00 2001 +From ba798c39689ed7d2f3952a250825f0c3a0b4cf88 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to 4a209efb6278586d412ceb0a7cbe21e6769a7367. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch index be760f2e..9289af36 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6.patch @@ -1,4 +1,4 @@ -From ea3477f55df517065cc716c27e2df7ea68f6760e Mon Sep 17 00:00:00 2001 +From 7365c2f891b2cdfa4b9610b143bdccafd80851ec Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 23 Apr 2024 08:01:19 +1000 Subject: [PATCH] Updated vkd3d to 7b4a1fdfbc192cfd02ffb6cf18c0a86b2f6eaeb4. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch index 2a67c42a..6cdf5833 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-46fca3f9f4a9b47b32e9dfbacda0f3d1953.patch @@ -1,4 +1,4 @@ -From 417a497121a93250fa896b536dd56af55f562cba Mon Sep 17 00:00:00 2001 +From ddfe189d39a3dd3f1c99773c57bb0650e39e8354 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 24 Apr 2024 09:05:20 +1000 Subject: [PATCH] Updated vkd3d to 46fca3f9f4a9b47b32e9dfbacda0f3d19536c02c. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch index 3e14a505..f80ec902 100644 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-13e1491941a1af32ddfc1019fa304231fd1.patch @@ -1,4 +1,4 @@ -From 22ce76c9299aa5cd9b6df4244fb7614e322818fa Mon Sep 17 00:00:00 2001 +From e91e957dbee71d7729e7e6fe7aa6c04bf13c360b Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Sun, 28 Apr 2024 09:46:44 +1000 Subject: [PATCH] Updated vkd3d to 13e1491941a1af32ddfc1019fa304231fd121c4d. diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch index c1f02e8e..cc5b3afa 100644 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-62a512c4f8c4070f0f4f3ed8e70b6f0bc88.patch @@ -1,4 +1,4 @@ -From 64c8a6f765bc07221a5b9d0b78bb12db0eae898e Mon Sep 17 00:00:00 2001 +From a10a5f6d7f227464e7b594421cda94412460d7dc Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 3 May 2024 07:31:39 +1000 Subject: [PATCH] Updated vkd3d to 62a512c4f8c4070f0f4f3ed8e70b6f0bc885da30. diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch index a962258e..3ea4add8 100644 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch +++ b/patches/vkd3d-latest/0006-Updated-vkd3d-to-4b3a948edcb5e83074b63aad25ecf450dca.patch @@ -1,4 +1,4 @@ -From 9db69085387899436ec12182c76ea25b4c773219 Mon Sep 17 00:00:00 2001 +From 669dcd4183544055c8b512d6a60df7536e82b453 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Mon, 13 May 2024 09:29:08 +1000 Subject: [PATCH] Updated vkd3d to 4b3a948edcb5e83074b63aad25ecf450dcae4130. diff --git a/patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch b/patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch new file mode 100644 index 00000000..a2249699 --- /dev/null +++ b/patches/vkd3d-latest/0007-Updated-vkd3d-to-9e57039fce4040c8bfadaa73bf449c00591.patch @@ -0,0 +1,937 @@ +From 9cd79b111e612defc44743beab2de36703bb1786 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 15 May 2024 08:23:37 +1000 +Subject: [PATCH] Updated vkd3d to 9e57039fce4040c8bfadaa73bf449c005912a83e. + +--- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 5 + + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 8 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 55 +++++ + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 7 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 3 + + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 193 +++++++++++++----- + libs/vkd3d/libs/vkd3d-shader/ir.c | 9 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 149 +++++++++++++- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 2 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + + libs/vkd3d/libs/vkd3d/device.c | 97 ++++++++- + 11 files changed, 463 insertions(+), 70 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index f2ad39f2f07..9abc2c4db70 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -332,8 +332,10 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_WAVE_ACTIVE_BIT_AND ] = "wave_active_bit_and", + [VKD3DSIH_WAVE_ACTIVE_BIT_OR ] = "wave_active_bit_or", + [VKD3DSIH_WAVE_ACTIVE_BIT_XOR ] = "wave_active_bit_xor", ++ [VKD3DSIH_WAVE_ALL_BIT_COUNT ] = "wave_all_bit_count", + [VKD3DSIH_WAVE_ALL_TRUE ] = "wave_all_true", + [VKD3DSIH_WAVE_ANY_TRUE ] = "wave_any_true", ++ [VKD3DSIH_WAVE_IS_FIRST_LANE ] = "wave_is_first_lane", + [VKD3DSIH_WAVE_OP_ADD ] = "wave_op_add", + [VKD3DSIH_WAVE_OP_IMAX ] = "wave_op_imax", + [VKD3DSIH_WAVE_OP_IMIN ] = "wave_op_imin", +@@ -342,6 +344,9 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_WAVE_OP_MUL ] = "wave_op_mul", + [VKD3DSIH_WAVE_OP_UMAX ] = "wave_op_umax", + [VKD3DSIH_WAVE_OP_UMIN ] = "wave_op_umin", ++ [VKD3DSIH_WAVE_PREFIX_BIT_COUNT ] = "wave_prefix_bit_count", ++ [VKD3DSIH_WAVE_READ_LANE_AT ] = "wave_read_lane_at", ++ [VKD3DSIH_WAVE_READ_LANE_FIRST ] = "wave_read_lane_first", + [VKD3DSIH_XOR ] = "xor", + }; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 24a95224349..7c7c71e3c9a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1713,7 +1713,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { +- if (var->semantic.name || !var->regs[r].allocated) ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + ++uniform_count; +@@ -1751,14 +1751,14 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + { +- if (var->semantic.name || !var->regs[r].allocated) ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + put_u32(buffer, 0); /* name */ + if (r == HLSL_REGSET_NUMERIC) + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); +- put_u32(buffer, var->data_type->reg_size[r] / 4); ++ put_u32(buffer, var->bind_count[r]); + } + else + { +@@ -1780,7 +1780,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + { + size_t var_offset, name_offset; + +- if (var->semantic.name || !var->regs[r].allocated) ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) + continue; + + var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 29f736364dc..605e97530c1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -446,17 +446,22 @@ enum dx_intrinsic_opcode + DX_STORE_PATCH_CONSTANT = 106, + DX_OUTPUT_CONTROL_POINT_ID = 107, + DX_PRIMITIVE_ID = 108, ++ DX_WAVE_IS_FIRST_LANE = 110, + DX_WAVE_GET_LANE_INDEX = 111, + DX_WAVE_GET_LANE_COUNT = 112, + DX_WAVE_ANY_TRUE = 113, + DX_WAVE_ALL_TRUE = 114, + DX_WAVE_ACTIVE_ALL_EQUAL = 115, + DX_WAVE_ACTIVE_BALLOT = 116, ++ DX_WAVE_READ_LANE_AT = 117, ++ DX_WAVE_READ_LANE_FIRST = 118, + DX_WAVE_ACTIVE_OP = 119, + DX_WAVE_ACTIVE_BIT = 120, + DX_WAVE_PREFIX_OP = 121, + DX_LEGACY_F32TOF16 = 130, + DX_LEGACY_F16TOF32 = 131, ++ DX_WAVE_ALL_BIT_COUNT = 135, ++ DX_WAVE_PREFIX_BIT_COUNT = 136, + DX_RAW_BUFFER_LOAD = 139, + DX_RAW_BUFFER_STORE = 140, + }; +@@ -3816,6 +3821,8 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( + { + switch (sysval_semantic) + { ++ case VKD3D_SHADER_SV_COVERAGE: ++ return VKD3DSPR_COVERAGE; + case VKD3D_SHADER_SV_DEPTH: + return VKD3DSPR_DEPTHOUT; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: +@@ -4489,6 +4496,25 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s + return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); + } + ++static enum vkd3d_shader_opcode sm6_dx_map_void_op(enum dx_intrinsic_opcode op) ++{ ++ switch (op) ++ { ++ case DX_WAVE_IS_FIRST_LANE: ++ return VKD3DSIH_WAVE_IS_FIRST_LANE; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void sm6_parser_emit_dx_void(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_void_op(op)); ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) + { + switch (op) +@@ -4555,10 +4581,16 @@ static enum vkd3d_shader_opcode map_dx_unary_op(enum dx_intrinsic_opcode op) + return VKD3DSIH_F16TOF32; + case DX_WAVE_ACTIVE_ALL_EQUAL: + return VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL; ++ case DX_WAVE_ALL_BIT_COUNT: ++ return VKD3DSIH_WAVE_ALL_BIT_COUNT; + case DX_WAVE_ALL_TRUE: + return VKD3DSIH_WAVE_ALL_TRUE; + case DX_WAVE_ANY_TRUE: + return VKD3DSIH_WAVE_ANY_TRUE; ++ case DX_WAVE_PREFIX_BIT_COUNT: ++ return VKD3DSIH_WAVE_PREFIX_BIT_COUNT; ++ case DX_WAVE_READ_LANE_FIRST: ++ return VKD3DSIH_WAVE_READ_LANE_FIRST; + default: + vkd3d_unreachable(); + } +@@ -4594,6 +4626,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co + return VKD3DSIH_UMAX; + case DX_UMIN: + return VKD3DSIH_UMIN; ++ case DX_WAVE_READ_LANE_AT: ++ return VKD3DSIH_WAVE_READ_LANE_AT; + default: + vkd3d_unreachable(); + } +@@ -6233,11 +6267,16 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_WAVE_ACTIVE_BALLOT ] = {"V", "1", sm6_parser_emit_dx_wave_active_ballot}, + [DX_WAVE_ACTIVE_BIT ] = {"m", "Rc", sm6_parser_emit_dx_wave_active_bit}, + [DX_WAVE_ACTIVE_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, ++ [DX_WAVE_ALL_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_ALL_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_ANY_TRUE ] = {"1", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_GET_LANE_COUNT ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, + [DX_WAVE_GET_LANE_INDEX ] = {"i", "", sm6_parser_emit_dx_wave_builtin}, ++ [DX_WAVE_IS_FIRST_LANE ] = {"1", "", sm6_parser_emit_dx_void}, ++ [DX_WAVE_PREFIX_BIT_COUNT ] = {"i", "1", sm6_parser_emit_dx_unary}, + [DX_WAVE_PREFIX_OP ] = {"n", "Rcc", sm6_parser_emit_dx_wave_op}, ++ [DX_WAVE_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, ++ [DX_WAVE_READ_LANE_FIRST ] = {"n", "R", sm6_parser_emit_dx_unary}, + }; + + static bool sm6_parser_validate_operand_type(struct sm6_parser *sm6, const struct sm6_value *value, char info_type, +@@ -6697,6 +6736,15 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor + + code = record->operands[i++]; + ++ /* dxcompiler occasionally emits bool not-equal-to-false, which is a no-op. Bool comparisons ++ * do not otherwise occur, so deleting these avoids the need for backend support. */ ++ if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) ++ { ++ ins->handler_idx = VKD3DSIH_NOP; ++ *dst = *a; ++ return; ++ } ++ + if ((!is_int && !is_fp) || is_int != (code >= ICMP_EQ)) + { + FIXME("Invalid operation %"PRIu64" on type class %u.\n", code, type_a->class); +@@ -8407,8 +8455,11 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = + [SEMANTIC_KIND_VERTEXID] = VKD3D_SHADER_SV_VERTEX_ID, + [SEMANTIC_KIND_INSTANCEID] = VKD3D_SHADER_SV_INSTANCE_ID, + [SEMANTIC_KIND_POSITION] = VKD3D_SHADER_SV_POSITION, ++ [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, ++ [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, + [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, + [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, ++ [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, + [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, + [SEMANTIC_KIND_DEPTH] = VKD3D_SHADER_SV_DEPTH, + [SEMANTIC_KIND_DEPTHLESSEQUAL] = VKD3D_SHADER_SV_DEPTH_LESS_EQUAL, +@@ -9358,6 +9409,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + e->used_mask <<= index; + } + ++ /* DXIL reads/writes uint for bool I/O. */ ++ if (e->component_type == VKD3D_SHADER_COMPONENT_BOOL) ++ e->component_type = VKD3D_SHADER_COMPONENT_UINT; ++ + m = element_node->operands[4]; + if (!sm6_metadata_value_is_node(m)) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index a89e43f9bf2..08a017874ae 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -447,9 +447,10 @@ struct hlsl_ir_var + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; +- /* Minimum number of binds required to include all object components actually used in the shader. +- * It may be less than the allocation size, e.g. for texture arrays. */ +- unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; ++ /* Minimum number of binds required to include all components actually used in the shader. ++ * It may be less than the allocation size, e.g. for texture arrays. ++ * The bind_count for HLSL_REGSET_NUMERIC is only used in uniforms for now. */ ++ unsigned int bind_count[HLSL_REGSET_LAST + 1]; + + /* Whether the shader performs dereferences with non-constant offsets in the variable. */ + bool indexable; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 79317bb0545..c6b6219eb4b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -168,6 +168,9 @@ static struct list *make_empty_list(struct hlsl_ctx *ctx) + + static void destroy_block(struct hlsl_block *block) + { ++ if (!block) ++ return; ++ + hlsl_block_cleanup(block); + vkd3d_free(block); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 8882deaf6cd..26179042082 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -4248,34 +4248,67 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls + return false; + } + +-static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref) + { +- struct hlsl_ir_resource_load *load; +- struct hlsl_ir_var *var; +- enum hlsl_regset regset; ++ struct hlsl_ir_var *var = deref->var; ++ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); ++ uint32_t required_bind_count; ++ struct hlsl_type *type; + unsigned int index; + +- if (instr->type != HLSL_IR_RESOURCE_LOAD) +- return false; +- +- load = hlsl_ir_resource_load(instr); +- var = load->resource.var; ++ if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) ++ return; + +- regset = hlsl_deref_get_regset(ctx, &load->resource); ++ if (regset <= HLSL_REGSET_LAST_OBJECT) ++ { ++ var->objects_usage[regset][index].used = true; ++ var->bind_count[regset] = max(var->bind_count[regset], index + 1); ++ } ++ else if (regset == HLSL_REGSET_NUMERIC) ++ { ++ type = hlsl_deref_get_type(ctx, deref); + +- if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) +- return false; ++ hlsl_regset_index_from_deref(ctx, deref, regset, &index); ++ required_bind_count = align(index + type->reg_size[regset], 4) / 4; ++ var->bind_count[regset] = max(var->bind_count[regset], required_bind_count); ++ } ++ else ++ { ++ vkd3d_unreachable(); ++ } ++} + +- var->objects_usage[regset][index].used = true; +- var->bind_count[regset] = max(var->bind_count[regset], index + 1); +- if (load->sampler.var) ++static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ switch (instr->type) + { +- var = load->sampler.var; +- if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) +- return false; ++ case HLSL_IR_LOAD: ++ { ++ struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ ++ if (!load->src.var->is_uniform) ++ return false; ++ ++ /* These will are handled by validate_static_object_references(). */ ++ if (hlsl_deref_get_regset(ctx, &load->src) != HLSL_REGSET_NUMERIC) ++ return false; ++ ++ register_deref_usage(ctx, &load->src); ++ break; ++ } ++ ++ case HLSL_IR_RESOURCE_LOAD: ++ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->resource); ++ if (hlsl_ir_resource_load(instr)->sampler.var) ++ register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->sampler); ++ break; + +- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; +- var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); ++ case HLSL_IR_RESOURCE_STORE: ++ register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); ++ break; ++ ++ default: ++ break; + } + + return false; +@@ -4520,16 +4553,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + } + } + ++static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) ++{ ++ struct hlsl_ir_var *var; ++ ++ list_remove(&to_sort->extern_entry); ++ ++ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) ++ { ++ uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; ++ uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; ++ ++ if (to_sort_size > var_size) ++ { ++ list_add_before(&var->extern_entry, &to_sort->extern_entry); ++ return; ++ } ++ } ++ ++ list_add_tail(sorted, &to_sort->extern_entry); ++} ++ ++static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) ++{ ++ struct list sorted = LIST_INIT(sorted); ++ struct hlsl_ir_var *var, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform) ++ sort_uniform_by_numeric_bind_count(&sorted, var); ++ } ++ list_move_tail(&ctx->extern_vars, &sorted); ++} ++ + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { + struct register_allocator allocator = {0}; + struct hlsl_ir_var *var; + ++ sort_uniforms_by_numeric_bind_count(ctx); ++ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + +- if (!var->is_uniform || !var->last_read || reg_size == 0) ++ if (!var->is_uniform || reg_size == 0) + continue; + + if (var->reg_reservation.reg_type == 'c') +@@ -4560,15 +4629,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; + +- if (!var->is_uniform || !var->last_read || reg_size == 0) ++ if (!var->is_uniform || alloc_size == 0) + continue; + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { +- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, +- 1, UINT_MAX, var->data_type); ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } +@@ -5102,14 +5170,15 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + return true; + } + ++/* Retrieves true if the index is constant, and false otherwise. In the latter case, the maximum ++ * possible index is retrieved, assuming there is not out-of-bounds access. */ + bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) + { + struct hlsl_type *type = deref->var->data_type; ++ bool index_is_constant = true; + unsigned int i; + +- assert(regset <= HLSL_REGSET_LAST_OBJECT); +- + *index = 0; + + for (i = 0; i < deref->path_len; ++i) +@@ -5118,37 +5187,62 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref + unsigned int idx = 0; + + assert(path_node); +- if (path_node->type != HLSL_IR_CONSTANT) +- return false; ++ if (path_node->type == HLSL_IR_CONSTANT) ++ { ++ /* We should always have generated a cast to UINT. */ ++ assert(path_node->data_type->class == HLSL_CLASS_SCALAR ++ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + +- /* We should always have generated a cast to UINT. */ +- assert(path_node->data_type->class == HLSL_CLASS_SCALAR +- && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; + +- idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ if (idx >= type->e.array.elements_count) ++ return false; + +- switch (type->class) ++ *index += idx * type->e.array.type->reg_size[regset]; ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ *index += type->e.record.fields[idx].reg_offset[regset]; ++ break; ++ ++ case HLSL_CLASS_MATRIX: ++ *index += 4 * idx; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ } ++ else + { +- case HLSL_CLASS_ARRAY: +- if (idx >= type->e.array.elements_count) +- return false; ++ index_is_constant = false; + +- *index += idx * type->e.array.type->reg_size[regset]; +- break; ++ switch (type->class) ++ { ++ case HLSL_CLASS_ARRAY: ++ idx = type->e.array.elements_count - 1; ++ *index += idx * type->e.array.type->reg_size[regset]; ++ break; + +- case HLSL_CLASS_STRUCT: +- *index += type->e.record.fields[idx].reg_offset[regset]; +- break; ++ case HLSL_CLASS_MATRIX: ++ idx = hlsl_type_major_size(type) - 1; ++ *index += idx * 4; ++ break; + +- default: +- vkd3d_unreachable(); ++ default: ++ vkd3d_unreachable(); ++ } + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + +- assert(type->reg_size[regset] == 1); +- return true; ++ assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); ++ assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); ++ return index_is_constant; + } + + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) +@@ -5440,7 +5534,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); +- hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); ++ ++ do ++ compute_liveness(ctx, entry_func); ++ while (hlsl_transform_ir(ctx, dce, body, NULL)); ++ ++ hlsl_transform_ir(ctx, track_components_usage, body, NULL); + sort_synthetic_separated_samplers_first(ctx); + + if (profile->major_version < 4) +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index c6ecbdd9e46..2c78447e382 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -5748,11 +5748,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) + instruction->declaration.max_tessellation_factor); + return; + +- /* The DXIL parser can generate these outside phases, but this is not an issue. */ +- case VKD3DSIH_DCL_INPUT: +- case VKD3DSIH_DCL_OUTPUT: +- return; +- + case VKD3DSIH_DCL_INPUT_PRIMITIVE: + if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED + || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) +@@ -5810,7 +5805,9 @@ static void vsir_validate_instruction(struct validation_context *ctx) + break; + } + +- if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) ++ /* Only DCL instructions may occur outside hull shader phases. */ ++ if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL ++ && ctx->phase == VKD3DSIH_INVALID) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Instruction %#x appear before any phase instruction in a hull shader.", + instruction->handler_idx); +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 813e20fdcd7..4ee8e6bba4c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -1760,6 +1760,45 @@ static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_ + result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); + } + ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, SpvGroupOperation group_op, uint32_t val_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBallotBitCount, ++ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), group_op, val_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_elect(struct vkd3d_spirv_builder *builder) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniform); ++ return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpGroupNonUniformElect, ++ vkd3d_spirv_get_op_type_bool(builder), vkd3d_spirv_get_op_scope_subgroup(builder)); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t lane_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcast, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_shuffle(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t lane_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformShuffle); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformShuffle, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, lane_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_broadcast_first(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformBallot); ++ return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpGroupNonUniformBroadcastFirst, ++ result_type, vkd3d_spirv_get_op_scope_subgroup(builder), val_id); ++} ++ + static uint32_t vkd3d_spirv_build_op_glsl_std450_tr1(struct vkd3d_spirv_builder *builder, + enum GLSLstd450 op, uint32_t result_type, uint32_t operand) + { +@@ -3602,8 +3641,9 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp + indexes[index_count++] = spirv_compiler_emit_register_addressing(compiler, ®->idx[0]); + } + ++ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ + if (reg->alignment) +- WARN("Ignoring alignment %u.\n", reg->alignment); ++ TRACE("Ignoring alignment %u.\n", reg->alignment); + + if (index_count) + { +@@ -5827,7 +5867,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler + + if (flags & ~(VKD3DSGF_REFACTORING_ALLOWED | VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)) + FIXME("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); +- else ++ else if (flags) + WARN("Unhandled global flags %#"PRIx64".\n", (uint64_t)flags); + } + +@@ -5889,8 +5929,9 @@ static void spirv_compiler_emit_dcl_indexable_temp(struct spirv_compiler *compil + vsir_register_init(®, VKD3DSPR_IDXTEMP, VKD3D_DATA_FLOAT, 1); + reg.idx[0].offset = temp->register_idx; + ++ /* Alignment is supported only in the Kernel execution model and is an optimisation only. */ + if (temp->alignment) +- WARN("Ignoring alignment %u.\n", temp->alignment); ++ TRACE("Ignoring alignment %u.\n", temp->alignment); + + function_location = spirv_compiler_get_current_function_location(compiler); + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); +@@ -9801,18 +9842,26 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +-static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static uint32_t spirv_compiler_emit_group_nonuniform_ballot(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_src_param *src) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_dst_param *dst = instruction->dst; +- const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, VKD3D_VEC4_SIZE); + val_id = spirv_compiler_emit_load_src(compiler, src, VKD3DSP_WRITEMASK_0); + val_id = vkd3d_spirv_build_op_group_nonuniform_ballot(builder, type_id, val_id); + ++ return val_id; ++} ++ ++static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ uint32_t val_id; ++ ++ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +@@ -9871,6 +9920,79 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + ++static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ SpvGroupOperation group_op; ++ uint32_t type_id, val_id; ++ ++ group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan ++ : SpvGroupOperationReduce; ++ ++ val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_ballot_bit_count(builder, type_id, group_op, val_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_wave_is_first_lane(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ uint32_t val_id; ++ ++ val_id = vkd3d_spirv_build_op_group_nonuniform_elect(builder); ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_wave_read_lane_at(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, lane_id, val_id; ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); ++ ++ /* TODO: detect values loaded from a const buffer? */ ++ if (register_is_constant_or_undef(&src[1].reg)) ++ { ++ /* Uniform lane_id only. */ ++ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast(builder, type_id, val_id, lane_id); ++ } ++ else ++ { ++ /* WaveReadLaneAt supports non-uniform lane ids, so if lane_id is not constant it may not be uniform. */ ++ val_id = vkd3d_spirv_build_op_group_nonuniform_shuffle(builder, type_id, val_id, lane_id); ++ } ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_wave_read_lane_first(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id; ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_broadcast_first(builder, type_id, val_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ + /* This function is called after declarations are processed. */ + static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) + { +@@ -10236,6 +10358,19 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_WAVE_OP_UMIN: + spirv_compiler_emit_wave_alu_op(compiler, instruction); + break; ++ case VKD3DSIH_WAVE_ALL_BIT_COUNT: ++ case VKD3DSIH_WAVE_PREFIX_BIT_COUNT: ++ spirv_compiler_emit_wave_bit_count(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_IS_FIRST_LANE: ++ spirv_compiler_emit_wave_is_first_lane(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_READ_LANE_AT: ++ spirv_compiler_emit_wave_read_lane_at(compiler, instruction); ++ break; ++ case VKD3DSIH_WAVE_READ_LANE_FIRST: ++ spirv_compiler_emit_wave_read_lane_first(compiler, instruction); ++ break; + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 29b2c1482a9..c15dae52c50 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -2004,7 +2004,7 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, + { + void *params; + +- if (count > allocator->count - allocator->index) ++ if (!allocator->current || count > allocator->count - allocator->index) + { + struct vkd3d_shader_param_node *next; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index bf9d3038f08..742189cefbb 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -533,8 +533,10 @@ enum vkd3d_shader_opcode + VKD3DSIH_WAVE_ACTIVE_BIT_AND, + VKD3DSIH_WAVE_ACTIVE_BIT_OR, + VKD3DSIH_WAVE_ACTIVE_BIT_XOR, ++ VKD3DSIH_WAVE_ALL_BIT_COUNT, + VKD3DSIH_WAVE_ALL_TRUE, + VKD3DSIH_WAVE_ANY_TRUE, ++ VKD3DSIH_WAVE_IS_FIRST_LANE, + VKD3DSIH_WAVE_OP_ADD, + VKD3DSIH_WAVE_OP_IMAX, + VKD3DSIH_WAVE_OP_IMIN, +@@ -543,6 +545,9 @@ enum vkd3d_shader_opcode + VKD3DSIH_WAVE_OP_MUL, + VKD3DSIH_WAVE_OP_UMAX, + VKD3DSIH_WAVE_OP_UMIN, ++ VKD3DSIH_WAVE_PREFIX_BIT_COUNT, ++ VKD3DSIH_WAVE_READ_LANE_AT, ++ VKD3DSIH_WAVE_READ_LANE_FIRST, + VKD3DSIH_XOR, + + VKD3DSIH_INVALID, +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 36d8433939a..8b817264e63 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -3487,12 +3487,20 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + return E_INVALIDARG; + } + ++ if (data->HighestShaderModel != D3D_SHADER_MODEL_5_1 ++ && (data->HighestShaderModel < D3D_SHADER_MODEL_6_0 ++ || data->HighestShaderModel > D3D_HIGHEST_SHADER_MODEL)) ++ { ++ WARN("Unknown shader model %#x.\n", data->HighestShaderModel); ++ return E_INVALIDARG; ++ } ++ + TRACE("Request shader model %#x.\n", data->HighestShaderModel); + + #ifdef VKD3D_SHADER_UNSUPPORTED_DXIL +- data->HighestShaderModel = D3D_SHADER_MODEL_6_0; ++ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_6_0); + #else +- data->HighestShaderModel = D3D_SHADER_MODEL_5_1; ++ data->HighestShaderModel = min(data->HighestShaderModel, D3D_SHADER_MODEL_5_1); + #endif + + TRACE("Shader model %#x.\n", data->HighestShaderModel); +@@ -3911,6 +3919,91 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + return S_OK; + } + ++ case D3D12_FEATURE_D3D12_OPTIONS14: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS14 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->AdvancedTextureOpsSupported = FALSE; ++ data->WriteableMSAATexturesSupported = FALSE; ++ data->IndependentFrontAndBackStencilRefMaskSupported = FALSE; ++ ++ TRACE("Advanced texture ops %#x.\n", data->AdvancedTextureOpsSupported); ++ TRACE("Writeable MSAA textures %#x.\n", data->WriteableMSAATexturesSupported); ++ TRACE("Independent front and back stencil ref mask %#x.\n", data->IndependentFrontAndBackStencilRefMaskSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS15: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS15 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->TriangleFanSupported = FALSE; ++ data->DynamicIndexBufferStripCutSupported = FALSE; ++ ++ TRACE("Triangle fan %#x.\n", data->TriangleFanSupported); ++ TRACE("Dynamic index buffer strip cut %#x.\n", data->DynamicIndexBufferStripCutSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS16: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS16 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->DynamicDepthBiasSupported = FALSE; ++ data->GPUUploadHeapSupported = FALSE; ++ ++ TRACE("Dynamic depth bias %#x.\n", data->DynamicDepthBiasSupported); ++ TRACE("GPU upload heap %#x.\n", data->GPUUploadHeapSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS17: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS17 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->NonNormalizedCoordinateSamplersSupported = FALSE; ++ data->ManualWriteTrackingResourceSupported = FALSE; ++ ++ TRACE("Non-normalized coordinate samplers %#x.\n", data->NonNormalizedCoordinateSamplersSupported); ++ TRACE("Manual write tracking resource %#x.\n", data->ManualWriteTrackingResourceSupported); ++ return S_OK; ++ } ++ ++ case D3D12_FEATURE_D3D12_OPTIONS18: ++ { ++ D3D12_FEATURE_DATA_D3D12_OPTIONS18 *data = feature_data; ++ ++ if (feature_data_size != sizeof(*data)) ++ { ++ WARN("Invalid size %u.\n", feature_data_size); ++ } ++ ++ data->RenderPassesValid = FALSE; ++ ++ TRACE("Render passes valid %#x.\n", data->RenderPassesValid); ++ return S_OK; ++ } ++ + default: + FIXME("Unhandled feature %#x.\n", feature); + return E_NOTIMPL; +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch b/patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch new file mode 100644 index 00000000..73845b28 --- /dev/null +++ b/patches/vkd3d-latest/0008-Updated-vkd3d-to-061dc390367b4c83022d5fe1255f8d38f6b.patch @@ -0,0 +1,153 @@ +From cda6dd1902e0113ad3730c1f696138b668bbfacb Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 16 May 2024 11:56:37 +1000 +Subject: [PATCH] Updated vkd3d to 061dc390367b4c83022d5fe1255f8d38f6b7ce9c. + +--- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 13 ++-- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 69 ++++++++++++--------- + 3 files changed, 47 insertions(+), 36 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 08a017874ae..27814f3a56f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -1250,6 +1250,7 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const + + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); + ++void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); + int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index c6b6219eb4b..9c1bdef926d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -1293,7 +1293,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + struct hlsl_ir_node *node; + struct hlsl_block expr; + unsigned int ret = 0; +- bool progress; ++ struct hlsl_src src; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { +@@ -1330,13 +1330,12 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + return 0; + } + +- do +- { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, &expr); +- } while (progress); ++ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ ++ hlsl_src_from_node(&src, node_from_block(&expr)); ++ hlsl_run_const_passes(ctx, &expr); ++ node = src.node; ++ hlsl_src_remove(&src); + +- node = node_from_block(&expr); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 26179042082..27f16af51c5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -5408,6 +5408,44 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod + } + } + ++void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) ++{ ++ bool progress; ++ ++ lower_ir(ctx, lower_matrix_swizzles, body); ++ lower_ir(ctx, lower_index_loads, body); ++ ++ lower_ir(ctx, lower_broadcasts, body); ++ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); ++ do ++ { ++ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); ++ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); ++ } ++ while (progress); ++ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); ++ ++ lower_ir(ctx, lower_narrowing_casts, body); ++ lower_ir(ctx, lower_int_dot, body); ++ lower_ir(ctx, lower_int_division, body); ++ lower_ir(ctx, lower_int_modulus, body); ++ lower_ir(ctx, lower_int_abs, body); ++ lower_ir(ctx, lower_casts_to_bool, body); ++ lower_ir(ctx, lower_float_modulus, body); ++ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, body); ++ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); ++ } while (progress); ++} ++ + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) + { +@@ -5416,7 +5454,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + struct recursive_call_ctx recursive_call_ctx; + struct hlsl_ir_var *var; + unsigned int i; +- bool progress; + + list_move_head(&body->instrs, &ctx->static_initializers.instrs); + +@@ -5494,35 +5531,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } +- lower_ir(ctx, lower_broadcasts, body); +- while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); +- do +- { +- progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); +- progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); +- } +- while (progress); +- hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + +- lower_ir(ctx, lower_narrowing_casts, body); +- lower_ir(ctx, lower_int_dot, body); +- lower_ir(ctx, lower_int_division, body); +- lower_ir(ctx, lower_int_modulus, body); +- lower_ir(ctx, lower_int_abs, body); +- lower_ir(ctx, lower_casts_to_bool, body); +- lower_ir(ctx, lower_float_modulus, body); +- hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); +- do +- { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); +- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); +- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, body); +- progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); +- progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); +- progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); +- } +- while (progress); ++ hlsl_run_const_passes(ctx, body); ++ + remove_unreachable_code(ctx, body); + hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); + +-- +2.43.0 +