diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-a0d52dc38508b76efedcb6fa1df3162a006.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-a0d52dc38508b76efedcb6fa1df3162a006.patch index 3d7cf179..edb68113 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-a0d52dc38508b76efedcb6fa1df3162a006.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-a0d52dc38508b76efedcb6fa1df3162a006.patch @@ -1,4 +1,4 @@ -From 0b7474f224beea3cdb08e7235f45e94aacf70722 Mon Sep 17 00:00:00 2001 +From 83e8be66c4e2654b5c9f6c99d511be381a5de284 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to a0d52dc38508b76efedcb6fa1df3162a0062ceaf. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-23259263cf662fb203a173b30b90f44cfbb.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-23259263cf662fb203a173b30b90f44cfbb.patch index c90e4dd1..8adb0334 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-23259263cf662fb203a173b30b90f44cfbb.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-23259263cf662fb203a173b30b90f44cfbb.patch @@ -1,4 +1,4 @@ -From 01e9d461f76202f7994a9f03ce6443bd6bbc9440 Mon Sep 17 00:00:00 2001 +From 4f30265014c3d99c2cf40236d2570010680b4740 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Sat, 9 Mar 2024 10:36:15 +1100 Subject: [PATCH] Updated vkd3d to 23259263cf662fb203a173b30b90f44cfbb9d29e. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-9b0d304f8fe4e6f0d065e4561af9e372e16.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-9b0d304f8fe4e6f0d065e4561af9e372e16.patch new file mode 100644 index 00000000..78ff8bbf --- /dev/null +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-9b0d304f8fe4e6f0d065e4561af9e372e16.patch @@ -0,0 +1,2215 @@ +From f27fc961454a26d013f598ea3d476ed50faee523 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 13 Mar 2024 15:58:53 +1100 +Subject: [PATCH] Updated vkd3d to 9b0d304f8fe4e6f0d065e4561af9e372e1643c2d. + +--- + libs/vkd3d/include/vkd3d_shader.h | 28 +- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 22 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 16 +- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 39 +-- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 145 +++++++++-- + libs/vkd3d/libs/vkd3d-shader/fx.c | 128 +++++++--- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 49 ++-- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 4 + + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 2 + + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 - + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 24 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 8 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 239 ++++++++++++++++-- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 166 ++++++++++-- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 45 ++-- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 22 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 35 ++- + libs/vkd3d/libs/vkd3d/device.c | 29 ++- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 3 +- + 19 files changed, 781 insertions(+), 224 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index 1a62f093d6b..83b90474af4 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -218,6 +218,20 @@ enum vkd3d_shader_compile_option_feature_flags + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLAGS), + }; + ++/** ++ * Flags for vkd3d_shader_parse_dxbc(). ++ * ++ * \since 1.12 ++ */ ++enum vkd3d_shader_parse_dxbc_flags ++{ ++ /** Ignore the checksum and continue parsing even if it is ++ * incorrect. */ ++ VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM = 0x00000001, ++ ++ VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARSE_DXBC_FLAGS), ++}; ++ + enum vkd3d_shader_compile_option_name + { + /** +@@ -285,6 +299,15 @@ enum vkd3d_shader_compile_option_name + * \since 1.11 + */ + VKD3D_SHADER_COMPILE_OPTION_FEATURE = 0x0000000a, ++ /** ++ * If \a value is non-zero compilation will produce a child effect using ++ * shared object descriptions, as instructed by the "shared" modifier. ++ * Child effects are supported with fx_2_0, fx_4_0, and fx_4_1. This option ++ * and "shared" modifiers are ignored for fx_5_0 profile, and non-fx profiles. ++ * ++ * \since 1.12 ++ */ ++ VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT = 0x0000000b, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), + }; +@@ -2385,9 +2408,8 @@ VKD3D_SHADER_API void vkd3d_shader_free_dxbc(struct vkd3d_shader_dxbc_desc *dxbc + * + * \param dxbc A vkd3d_shader_code structure containing the DXBC blob to parse. + * +- * \param flags A set of flags modifying the behaviour of the function. No +- * flags are defined for this version of vkd3d-shader, and this parameter +- * should be set to 0. ++ * \param flags A combination of zero or more elements of enum ++ * vkd3d_shader_parse_dxbc_flags. + * + * \param desc A vkd3d_shader_dxbc_desc structure describing the contents of + * the DXBC blob. Its vkd3d_shader_dxbc_section_desc structures will contain +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index b8dcfd011b6..fcbb321edd1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -649,6 +649,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum + [VKD3D_DATA_UINT8 ] = "uint8", + [VKD3D_DATA_UINT64 ] = "uint64", + [VKD3D_DATA_BOOL ] = "bool", ++ [VKD3D_DATA_UINT16 ] = "uint16", ++ [VKD3D_DATA_HALF ] = "half", + }; + + const char *name; +@@ -2196,22 +2198,22 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, + } + + static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, +- const struct vkd3d_shader_desc *shader_desc, enum vkd3d_shader_type shader_type) ++ const struct vsir_program *program) + { + enum vkd3d_result ret; + + if ((ret = dump_signature(compiler, ".input", +- shader_type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", +- &shader_desc->input_signature)) < 0) ++ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", ++ &program->input_signature)) < 0) + return ret; + + if ((ret = dump_signature(compiler, ".output", "o", +- &shader_desc->output_signature)) < 0) ++ &program->output_signature)) < 0) + return ret; + + if ((ret = dump_signature(compiler, ".patch_constant", +- shader_type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", +- &shader_desc->patch_constant_signature)) < 0) ++ program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", ++ &program->patch_constant_signature)) < 0) + return ret; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s.text%s\n", +@@ -2220,8 +2222,8 @@ static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler + return VKD3D_OK; + } + +-enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +- const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, ++enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, enum vsir_asm_flags flags) + { + const struct vkd3d_shader_version *shader_version = &program->shader_version; +@@ -2297,7 +2299,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, + * doesn't even have an explicit concept of signature. */ + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) + { +- if ((result = dump_signatures(&compiler, shader_desc, shader_version->type)) < 0) ++ if ((result = dump_signatures(&compiler, program)) < 0) + { + vkd3d_string_buffer_cleanup(buffer); + return result; +@@ -2355,7 +2357,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) + const char *p, *q, *end; + struct vkd3d_shader_code code; + +- if (vkd3d_dxbc_binary_to_text(program, NULL, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) ++ if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) + return; + + end = (const char *)code.code + code.size; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index a8cca17faa3..57dd0258aef 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -541,9 +541,9 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + struct signature_element *element; + + if (output) +- signature = &sm1->p.shader_desc.output_signature; ++ signature = &sm1->p.program.output_signature; + else +- signature = &sm1->p.shader_desc.input_signature; ++ signature = &sm1->p.program.input_signature; + + if ((element = find_signature_element(signature, name, index))) + { +@@ -581,9 +581,9 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + struct signature_element *element; + + if (output) +- signature = &sm1->p.shader_desc.output_signature; ++ signature = &sm1->p.program.output_signature; + else +- signature = &sm1->p.shader_desc.input_signature; ++ signature = &sm1->p.program.input_signature; + + if (!(element = find_signature_element_by_register_index(signature, register_index))) + { +@@ -886,7 +886,6 @@ static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) + struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); + + vsir_program_cleanup(&parser->program); +- free_shader_desc(&sm1->p.shader_desc); + vkd3d_free(sm1); + } + +@@ -1237,7 +1236,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + const uint32_t *code = compile_info->source.code; + size_t code_size = compile_info->source.size; +- struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_version version; + uint16_t shader_type; + size_t token_count; +@@ -1290,9 +1288,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, + if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, + code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- shader_desc = &sm1->p.shader_desc; +- shader_desc->byte_code = code; +- shader_desc->byte_code_size = code_size; + sm1->ptr = sm1->start; + + return VKD3D_OK; +@@ -2067,6 +2062,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + D3DDECLUSAGE usage; + bool ret; + ++ if ((!output && !var->last_read) || (output && !var->first_write)) ++ return; ++ + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + { + usage = 0; +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +index 37ebc73c099..8a1012d909b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +@@ -150,7 +150,7 @@ static const char *shader_get_string(const char *data, size_t data_size, size_t + } + + static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, +- const char *source_name, struct vkd3d_shader_dxbc_desc *desc) ++ const char *source_name, uint32_t flags, struct vkd3d_shader_dxbc_desc *desc) + { + const struct vkd3d_shader_location location = {.source_name = source_name}; + struct vkd3d_shader_dxbc_section_desc *sections, *section; +@@ -186,17 +186,20 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ + checksum[1] = read_u32(&ptr); + checksum[2] = read_u32(&ptr); + checksum[3] = read_u32(&ptr); +- vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); +- if (memcmp(checksum, calculated_checksum, sizeof(checksum))) +- { +- WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " +- "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", +- checksum[0], checksum[1], checksum[2], checksum[3], +- calculated_checksum[0], calculated_checksum[1], +- calculated_checksum[2], calculated_checksum[3]); +- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, +- "Invalid DXBC checksum."); +- return VKD3D_ERROR_INVALID_ARGUMENT; ++ if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) ++ { ++ vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); ++ if (memcmp(checksum, calculated_checksum, sizeof(checksum))) ++ { ++ WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " ++ "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", ++ checksum[0], checksum[1], checksum[2], checksum[3], ++ calculated_checksum[0], calculated_checksum[1], ++ calculated_checksum[2], calculated_checksum[3]); ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, ++ "Invalid DXBC checksum."); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } + } + + version = read_u32(&ptr); +@@ -287,7 +290,7 @@ static int for_each_dxbc_section(const struct vkd3d_shader_code *dxbc, + unsigned int i; + int ret; + +- if ((ret = parse_dxbc(dxbc, message_context, source_name, &desc)) < 0) ++ if ((ret = parse_dxbc(dxbc, message_context, source_name, 0, &desc)) < 0) + return ret; + + for (i = 0; i < desc.section_count; ++i) +@@ -313,7 +316,7 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, + *messages = NULL; + vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); + +- ret = parse_dxbc(dxbc, &message_context, NULL, desc); ++ ret = parse_dxbc(dxbc, &message_context, NULL, flags, desc); + + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages) && ret >= 0) +@@ -485,7 +488,7 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + struct vkd3d_shader_message_context *message_context, void *context) + { +- struct vkd3d_shader_desc *desc = context; ++ struct dxbc_shader_desc *desc = context; + int ret; + + switch (section->tag) +@@ -550,7 +553,7 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + return VKD3D_OK; + } + +-void free_shader_desc(struct vkd3d_shader_desc *desc) ++void free_dxbc_shader_desc(struct dxbc_shader_desc *desc) + { + shader_signature_cleanup(&desc->input_signature); + shader_signature_cleanup(&desc->output_signature); +@@ -558,7 +561,7 @@ void free_shader_desc(struct vkd3d_shader_desc *desc) + } + + int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) ++ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc) + { + int ret; + +@@ -569,7 +572,7 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + if (ret < 0) + { + WARN("Failed to parse shader, vkd3d result %d.\n", ret); +- free_shader_desc(desc); ++ free_dxbc_shader_desc(desc); + } + + return ret; +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index a202d208f9d..de51588b513 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -389,6 +389,8 @@ enum dx_intrinsic_opcode + DX_BUFFER_LOAD = 68, + DX_BUFFER_STORE = 69, + DX_GET_DIMENSIONS = 72, ++ DX_TEXTURE_GATHER = 73, ++ DX_TEXTURE_GATHER_CMP = 74, + DX_ATOMIC_BINOP = 78, + DX_ATOMIC_CMP_XCHG = 79, + DX_DERIV_COARSEX = 83, +@@ -2110,6 +2112,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) + return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; + } + ++static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count) ++{ ++ unsigned int i; ++ for (i = 0; i < count; ++i) ++ if (!sm6_value_is_constant(values[i]) && !sm6_value_is_undef(values[i])) ++ return false; ++ return true; ++} ++ + static bool sm6_value_is_icb(const struct sm6_value *value) + { + return value->value_type == VALUE_TYPE_ICB; +@@ -2199,6 +2210,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type + return VKD3D_DATA_BOOL; + case 8: + return VKD3D_DATA_UINT8; ++ case 16: ++ return VKD3D_DATA_UINT16; + case 32: + return VKD3D_DATA_UINT; + case 64: +@@ -2212,6 +2225,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type + { + switch (type->u.width) + { ++ case 16: ++ return VKD3D_DATA_HALF; + case 32: + return VKD3D_DATA_FLOAT; + case 64: +@@ -2876,7 +2891,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const + } + + if (type->u.width == 16) +- FIXME("Half float type is not supported yet.\n"); ++ dst->u.reg.u.immconst_u32[0] = record->operands[0]; + else if (type->u.width == 32) + dst->u.reg.u.immconst_f32[0] = bitcast_uint64_to_float(record->operands[0]); + else if (type->u.width == 64) +@@ -4182,7 +4197,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + +- signature = &sm6->p.shader_desc.input_signature; ++ signature = &sm6->p.program.input_signature; + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); +@@ -4572,7 +4587,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + row_index = sm6_value_get_constant_uint(operands[0]); + column_index = sm6_value_get_constant_uint(operands[2]); + +- signature = &sm6->p.shader_desc.output_signature; ++ signature = &sm6->p.program.output_signature; + if (row_index >= signature->element_count) + { + WARN("Invalid row index %u.\n", row_index); +@@ -4612,6 +4627,68 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + src_param_init_from_value(src_param, value); + } + ++static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_register coord, offset; ++ const struct sm6_value *resource, *sampler; ++ struct vkd3d_shader_src_param *src_params; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int swizzle; ++ bool extended_offset; ++ ++ resource = operands[0]; ++ sampler = operands[1]; ++ if (!sm6_value_validate_is_texture_handle(resource, op, sm6) ++ || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) ++ { ++ return; ++ } ++ ++ if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], VKD3D_VEC4_SIZE, NULL, state, &coord)) ++ return; ++ ++ if ((extended_offset = !sm6_value_vector_is_constant_or_undef(&operands[6], 2)) ++ && !sm6_parser_emit_coordinate_construct(sm6, &operands[6], 2, NULL, state, &offset)) ++ { ++ return; ++ } ++ ++ ins = state->ins; ++ if (op == DX_TEXTURE_GATHER) ++ { ++ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO : VKD3DSIH_GATHER4, resource, sm6); ++ if (!(src_params = instruction_src_params_alloc(ins, 3 + extended_offset, sm6))) ++ return; ++ } ++ else ++ { ++ instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO_C : VKD3DSIH_GATHER4_C, resource, sm6); ++ if (!(src_params = instruction_src_params_alloc(ins, 4 + extended_offset, sm6))) ++ return; ++ src_param_init_from_value(&src_params[3 + extended_offset], operands[9]); ++ } ++ ++ src_param_init_vector_from_reg(&src_params[0], &coord); ++ if (extended_offset) ++ src_param_init_vector_from_reg(&src_params[1], &offset); ++ else ++ instruction_set_texel_offset(ins, &operands[6], sm6); ++ src_param_init_vector_from_reg(&src_params[1 + extended_offset], &resource->u.handle.reg); ++ src_param_init_vector_from_reg(&src_params[2 + extended_offset], &sampler->u.handle.reg); ++ /* Swizzle stored in the sampler parameter is the scalar component index to be gathered. */ ++ swizzle = sm6_value_get_constant_uint(operands[8]); ++ if (swizzle >= VKD3D_VEC4_SIZE) ++ { ++ WARN("Invalid swizzle %#x.\n", swizzle); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Swizzle %#x for a texture gather operation is invalid.", swizzle); ++ } ++ src_params[2 + extended_offset].swizzle = swizzle; ++ ++ instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); ++} ++ + static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4791,6 +4868,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, + [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, ++ [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, ++ [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, + [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, + [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, + [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, +@@ -5055,7 +5134,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ + break; + case CAST_ZEXT: + case CAST_SEXT: +- /* nop or min precision. TODO: native 16-bit */ ++ /* nop or min precision. TODO: native 16-bit. ++ * Extension instructions could be emitted for min precision, but in Windows ++ * the AMD RX 580 simply drops such instructions, which makes sense as no ++ * assumptions should be made about any behaviour which depends on bit width. */ + if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) + { + op = VKD3DSIH_NOP; +@@ -7752,19 +7834,19 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons + } + + if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], +- &sm6->p.shader_desc.input_signature)) < 0) ++ &sm6->p.program.input_signature)) < 0) + { + return ret; + } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], +- &sm6->p.shader_desc.output_signature)) < 0) ++ &sm6->p.program.output_signature)) < 0) + { + return ret; + } + /* TODO: patch constant signature in operand 2. */ + +- sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); +- sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); ++ sm6_parser_init_input_signature(sm6, &sm6->p.program.input_signature); ++ sm6_parser_init_output_signature(sm6, &sm6->p.program.output_signature); + + return VKD3D_OK; + } +@@ -8062,7 +8144,6 @@ static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) + sm6_parser_metadata_cleanup(sm6); + vkd3d_free(sm6->descriptors); + vkd3d_free(sm6->values); +- free_shader_desc(&parser->shader_desc); + vkd3d_free(sm6); + } + +@@ -8080,15 +8161,16 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 + return NULL; + } + +-static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, +- const char *source_name, struct vkd3d_shader_message_context *message_context) ++static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *source_name, ++ struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) + { +- const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; +- const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; ++ const struct shader_signature *output_signature = &sm6->p.program.output_signature; ++ const struct shader_signature *input_signature = &sm6->p.program.input_signature; ++ size_t count, length, function_count, byte_code_size = dxbc_desc->byte_code_size; + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; ++ const uint32_t *byte_code = dxbc_desc->byte_code; + unsigned int chunk_offset, chunk_size; +- size_t count, length, function_count; + enum bitcode_block_abbreviation abbr; + struct vkd3d_shader_version version; + struct dxil_block *block; +@@ -8181,6 +8263,11 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + sm6->ptr = &sm6->start[1]; + sm6->bitpos = 2; + ++ sm6->p.program.input_signature = dxbc_desc->input_signature; ++ sm6->p.program.output_signature = dxbc_desc->output_signature; ++ sm6->p.program.patch_constant_signature = dxbc_desc->patch_constant_signature; ++ memset(dxbc_desc, 0, sizeof(*dxbc_desc)); ++ + block = &sm6->root_block; + if ((ret = dxil_block_init(block, NULL, sm6)) < 0) + { +@@ -8351,7 +8438,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t + int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) + { +- struct vkd3d_shader_desc *shader_desc; ++ struct dxbc_shader_desc dxbc_desc = {0}; + uint32_t *byte_code = NULL; + struct sm6_parser *sm6; + int ret; +@@ -8364,31 +8451,33 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +- shader_desc = &sm6->p.shader_desc; +- shader_desc->is_dxil = true; ++ dxbc_desc.is_dxil = true; + if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, +- shader_desc)) < 0) ++ &dxbc_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm6); + return ret; + } + +- sm6->p.shader_desc = *shader_desc; +- shader_desc = &sm6->p.shader_desc; +- +- if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) ++ if (((uintptr_t)dxbc_desc.byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) + { + /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC + * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ +- if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) +- ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); +- else +- memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); ++ if (!(byte_code = vkd3d_malloc(align(dxbc_desc.byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) ++ { ++ ERR("Failed to allocate aligned chunk.\n"); ++ free_dxbc_shader_desc(&dxbc_desc); ++ vkd3d_free(sm6); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ memcpy(byte_code, dxbc_desc.byte_code, dxbc_desc.byte_code_size); ++ dxbc_desc.byte_code = byte_code; + } + +- ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, +- compile_info->source_name, message_context); ++ ret = sm6_parser_init(sm6, compile_info->source_name, message_context, &dxbc_desc); ++ free_dxbc_shader_desc(&dxbc_desc); + vkd3d_free(byte_code); + + if (!sm6->p.failed && ret >= 0) +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index fd5c8443221..fdf132e9ed4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -64,6 +64,7 @@ struct fx_write_context_ops + uint32_t (*write_type)(const struct hlsl_type *type, struct fx_write_context *fx); + void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); ++ bool are_child_effects_supported; + }; + + struct fx_write_context +@@ -84,8 +85,12 @@ struct fx_write_context + uint32_t buffer_count; + uint32_t numeric_variable_count; + uint32_t object_variable_count; ++ uint32_t shared_object_count; ++ uint32_t shader_variable_count; + int status; + ++ bool child_effect; ++ + const struct fx_write_context_ops *ops; + }; + +@@ -174,6 +179,8 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co + + rb_init(&fx->strings, string_storage_compare); + list_init(&fx->types); ++ ++ fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; + } + + static int fx_write_context_cleanup(struct fx_write_context *fx) +@@ -320,19 +327,14 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, + return value; + } + +-static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) ++static const char * get_fx_4_type_name(const struct hlsl_type *type) + { +- struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; +- uint32_t name_offset, offset, size, stride, numeric_desc; +- uint32_t elements_count = 0; +- const char *name; +- static const uint32_t variable_type[] = ++ static const char * const object_type_names[] = + { +- [HLSL_CLASS_SCALAR] = 1, +- [HLSL_CLASS_VECTOR] = 1, +- [HLSL_CLASS_MATRIX] = 1, +- [HLSL_CLASS_OBJECT] = 2, +- [HLSL_CLASS_STRUCT] = 3, ++ [HLSL_TYPE_PIXELSHADER] = "PixelShader", ++ [HLSL_TYPE_VERTEXSHADER] = "VertexShader", ++ [HLSL_TYPE_RENDERTARGETVIEW] = "RenderTargetView", ++ [HLSL_TYPE_DEPTHSTENCILVIEW] = "DepthStencilView", + }; + static const char * const texture_type_names[] = + { +@@ -357,6 +359,39 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_BUFFER] = "RWBuffer", + [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", + }; ++ ++ if (type->base_type == HLSL_TYPE_TEXTURE) ++ return texture_type_names[type->sampler_dim]; ++ ++ if (type->base_type == HLSL_TYPE_UAV) ++ return uav_type_names[type->sampler_dim]; ++ ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_PIXELSHADER: ++ case HLSL_TYPE_VERTEXSHADER: ++ case HLSL_TYPE_RENDERTARGETVIEW: ++ case HLSL_TYPE_DEPTHSTENCILVIEW: ++ return object_type_names[type->base_type]; ++ default: ++ return type->name; ++ } ++} ++ ++static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; ++ uint32_t name_offset, offset, size, stride, numeric_desc; ++ uint32_t elements_count = 0; ++ const char *name; ++ static const uint32_t variable_type[] = ++ { ++ [HLSL_CLASS_SCALAR] = 1, ++ [HLSL_CLASS_VECTOR] = 1, ++ [HLSL_CLASS_MATRIX] = 1, ++ [HLSL_CLASS_OBJECT] = 2, ++ [HLSL_CLASS_STRUCT] = 3, ++ }; + struct hlsl_ctx *ctx = fx->ctx; + + /* Resolve arrays to element type and number of elements. */ +@@ -366,12 +401,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + type = hlsl_get_multiarray_element_type(type); + } + +- if (type->base_type == HLSL_TYPE_TEXTURE) +- name = texture_type_names[type->sampler_dim]; +- else if (type->base_type == HLSL_TYPE_UAV) +- name = uav_type_names[type->sampler_dim]; +- else +- name = type->name; ++ name = get_fx_4_type_name(type); + + name_offset = write_string(name, fx); + offset = put_u32_unaligned(buffer, name_offset); +@@ -424,6 +454,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + { + static const uint32_t object_type[] = + { ++ [HLSL_TYPE_PIXELSHADER] = 5, ++ [HLSL_TYPE_VERTEXSHADER] = 6, + [HLSL_TYPE_RENDERTARGETVIEW] = 19, + [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, + }; +@@ -454,7 +486,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + switch (type->base_type) + { + case HLSL_TYPE_DEPTHSTENCILVIEW: ++ case HLSL_TYPE_PIXELSHADER: + case HLSL_TYPE_RENDERTARGETVIEW: ++ case HLSL_TYPE_VERTEXSHADER: + put_u32_unaligned(buffer, object_type[type->base_type]); + break; + case HLSL_TYPE_TEXTURE: +@@ -661,6 +695,7 @@ static const struct fx_write_context_ops fx_4_ops = + .write_type = write_fx_4_type, + .write_technique = write_fx_4_technique, + .write_pass = write_fx_4_pass, ++ .are_child_effects_supported = true, + }; + + static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) +@@ -695,9 +730,12 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write + + static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) + { ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); ++ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t semantic_offset, bind_point = ~0u; +- uint32_t name_offset, type_offset; ++ uint32_t name_offset, type_offset, i; ++ struct hlsl_ctx *ctx = fx->ctx; + + if (var->reg_reservation.reg_type) + bind_point = var->reg_reservation.reg_index; +@@ -712,8 +750,35 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ + put_u32(buffer, bind_point); /* Explicit bind point */ + ++ if (fx->child_effect && var->storage_modifiers & HLSL_STORAGE_SHARED) ++ { ++ ++fx->shared_object_count; ++ return; ++ } ++ ++ /* Initializer */ ++ switch (type->base_type) ++ { ++ case HLSL_TYPE_TEXTURE: ++ case HLSL_TYPE_UAV: ++ case HLSL_TYPE_RENDERTARGETVIEW: ++ break; ++ case HLSL_TYPE_PIXELSHADER: ++ case HLSL_TYPE_VERTEXSHADER: ++ /* FIXME: write shader blobs, once parser support works. */ ++ for (i = 0; i < elements_count; ++i) ++ put_u32(buffer, 0); ++ ++fx->shader_variable_count; ++ break; ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.\n", ++ type->base_type); ++ } ++ + put_u32(buffer, 0); /* Annotations count */ + /* FIXME: write annotations */ ++ ++ ++fx->object_variable_count; + } + + static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) +@@ -734,7 +799,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + bind_point = b->reservation.reg_index; + if (b->type == HLSL_BUFFER_TEXTURE) + flags |= IS_TBUFFER; +- /* FIXME: set 'single' flag for fx_5_0 */ ++ if (ctx->profile->major_version == 5 && b->modifiers & HLSL_MODIFIER_SINGLE) ++ flags |= IS_SINGLE; + + name_offset = write_string(b->name, fx); + +@@ -806,21 +872,23 @@ static bool is_object_variable(const struct hlsl_ir_var *var) + } + } + +-static void write_objects(struct fx_write_context *fx) ++static void write_objects(struct fx_write_context *fx, bool shared) + { + struct hlsl_ir_var *var; +- uint32_t count = 0; ++ ++ if (shared && !fx->child_effect) ++ return; + + LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!is_object_variable(var)) + continue; + ++ if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) ++ continue; ++ + write_fx_4_object_variable(var, fx); +- ++count; + } +- +- fx->object_variable_count += count; + } + + static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -834,9 +902,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ + + write_buffers(&fx); +- write_objects(&fx); ++ write_objects(&fx, false); + /* TODO: shared buffers */ +- /* TODO: shared objects */ ++ write_objects(&fx, true); + + write_techniques(ctx->globals, &fx); + +@@ -846,7 +914,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ + put_u32(&buffer, 0); /* Pool buffer count. */ + put_u32(&buffer, 0); /* Pool variable count. */ +- put_u32(&buffer, 0); /* Pool object count. */ ++ put_u32(&buffer, fx.shared_object_count); /* Shared object count. */ + put_u32(&buffer, fx.technique_count); + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ + put_u32(&buffer, 0); /* String count. */ +@@ -857,7 +925,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, 0); /* Sampler state count. */ + put_u32(&buffer, 0); /* Rendertarget view count. */ + put_u32(&buffer, 0); /* Depth stencil view count. */ +- put_u32(&buffer, 0); /* Shader count. */ ++ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ + put_u32(&buffer, 0); /* Inline shader count. */ + + set_u32(&buffer, size_offset, fx.unstructured.size); +@@ -893,7 +961,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ + + write_buffers(&fx); +- write_objects(&fx); ++ write_objects(&fx, false); + /* TODO: interface variables */ + + write_groups(&fx); +@@ -915,7 +983,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, 0); /* Sampler state count. */ + put_u32(&buffer, 0); /* Rendertarget view count. */ + put_u32(&buffer, 0); /* Depth stencil view count. */ +- put_u32(&buffer, 0); /* Shader count. */ ++ put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ + put_u32(&buffer, 0); /* Inline shader count. */ + put_u32(&buffer, fx.group_count); /* Group count. */ + put_u32(&buffer, 0); /* UAV count. */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index c234caf8275..7c630d181ef 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -27,22 +27,6 @@ struct vkd3d_glsl_generator + bool failed; + }; + +-struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, +- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) +-{ +- struct vkd3d_glsl_generator *generator; +- +- if (!(generator = vkd3d_malloc(sizeof(*generator)))) +- return NULL; +- +- memset(generator, 0, sizeof(*generator)); +- generator->version = *version; +- vkd3d_string_buffer_init(&generator->buffer); +- generator->location = *location; +- generator->message_context = message_context; +- return generator; +-} +- + static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( + struct vkd3d_glsl_generator *generator, + enum vkd3d_shader_error error, const char *fmt, ...) +@@ -73,6 +57,8 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, + const struct vkd3d_shader_instruction *instruction) + { ++ generator->location = instruction->location; ++ + switch (instruction->handler_idx) + { + case VKD3DSIH_DCL_INPUT: +@@ -90,7 +76,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator + } + } + +-int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, ++static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + struct vsir_program *program, struct vkd3d_shader_code *out) + { + unsigned int i; +@@ -101,10 +87,8 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); + vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); + +- generator->location.column = 0; + for (i = 0; i < program->instructions.count; ++i) + { +- generator->location.line = i + 1; + vkd3d_glsl_handle_instruction(generator, &program->instructions.elements[i]); + } + +@@ -124,8 +108,29 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + return VKD3D_OK; + } + +-void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator) ++static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) ++{ ++ vkd3d_string_buffer_cleanup(&gen->buffer); ++} ++ ++static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_version *version, struct vkd3d_shader_message_context *message_context) + { +- vkd3d_string_buffer_cleanup(&generator->buffer); +- vkd3d_free(generator); ++ memset(gen, 0, sizeof(*gen)); ++ gen->version = *version; ++ vkd3d_string_buffer_init(&gen->buffer); ++ gen->message_context = message_context; ++} ++ ++int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_glsl_generator generator; ++ int ret; ++ ++ vkd3d_glsl_generator_init(&generator, &program->shader_version, message_context); ++ ret = vkd3d_glsl_generator_generate(&generator, program, out); ++ vkd3d_glsl_generator_cleanup(&generator); ++ ++ return ret; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index c315000a6e9..2820b9abf67 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -3597,6 +3597,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil + { + ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; + } ++ else if (option->name == VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT) ++ { ++ ctx->child_effect = !!option->value; ++ } + } + + return true; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 49858869e7d..681f2edce31 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -374,6 +374,7 @@ struct hlsl_attribute + #define HLSL_STORAGE_CENTROID 0x00004000 + #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 + #define HLSL_STORAGE_LINEAR 0x00010000 ++#define HLSL_MODIFIER_SINGLE 0x00020000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +@@ -922,6 +923,7 @@ struct hlsl_ctx + uint32_t found_numthreads : 1; + + bool semantic_compat_mapping; ++ bool child_effect; + }; + + struct hlsl_resource_load_params +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index 8dcceb94c1c..600e2cf2c6a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -109,7 +109,6 @@ packoffset {return KW_PACKOFFSET; } + pass {return KW_PASS; } + PixelShader {return KW_PIXELSHADER; } + pixelshader {return KW_PIXELSHADER; } +-precise {return KW_PRECISE; } + RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } + RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } + RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index b484a952497..ec8b3d22af2 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -5363,7 +5363,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_PACKOFFSET + %token KW_PASS + %token KW_PIXELSHADER +-%token KW_PRECISE + %token KW_RASTERIZERORDEREDBUFFER + %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER + %token KW_RASTERIZERORDEREDTEXTURE1D +@@ -6542,6 +6541,14 @@ type_no_void: + { + $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); + } ++ | KW_VERTEXSHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); ++ } ++ | KW_PIXELSHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); ++ } + + type: + type_no_void +@@ -6815,10 +6822,6 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOPERSPECTIVE, &@1); + } +- | KW_PRECISE var_modifiers +- { +- $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); +- } + | KW_SHARED var_modifiers + { + $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); +@@ -6867,7 +6870,16 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); + } +- ++ | var_identifier var_modifiers ++ { ++ if (!strcmp($1, "precise")) ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); ++ else if (!strcmp($1, "single")) ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SINGLE, &@1); ++ else ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER, ++ "Unknown modifier %s.", debugstr_a($1)); ++ } + + complex_initializer: + initializer_expr +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 8434a921a62..9eb65dc0170 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -2948,7 +2948,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + } + else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + { +- struct hlsl_ir_node *neg, *slt, *sum, *mul, *cond2; ++ struct hlsl_ir_node *neg, *slt, *sum, *cond2, *slt_cast, *mul; + + /* Expression used here is "slt() * (first - second) + second". */ + +@@ -2980,7 +2980,11 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return false; + hlsl_block_add_instr(block, sum); + +- if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, slt, sum))) ++ if (!(slt_cast = hlsl_new_cast(ctx, slt, sum->data_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, slt_cast); ++ ++ if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, slt_cast, sum))) + return false; + hlsl_block_add_instr(block, mul); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 3c862f33ef1..759c89957d6 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -32,6 +32,9 @@ void vsir_program_cleanup(struct vsir_program *program) + vkd3d_free((void *)program->block_names[i]); + vkd3d_free(program->block_names); + shader_instruction_array_destroy(&program->instructions); ++ shader_signature_cleanup(&program->input_signature); ++ shader_signature_cleanup(&program->output_signature); ++ shader_signature_cleanup(&program->patch_constant_signature); + } + + static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) +@@ -230,7 +233,7 @@ static const struct vkd3d_shader_varying_map *find_varying_map( + static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) + { +- struct shader_signature *signature = &parser->shader_desc.output_signature; ++ struct shader_signature *signature = &parser->program.output_signature; + const struct vkd3d_shader_varying_map_info *varying_map; + unsigned int i; + +@@ -1394,9 +1397,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse + normaliser.phase = VKD3DSIH_INVALID; + normaliser.shader_type = program->shader_version.type; + normaliser.major = program->shader_version.major; +- normaliser.input_signature = &parser->shader_desc.input_signature; +- normaliser.output_signature = &parser->shader_desc.output_signature; +- normaliser.patch_constant_signature = &parser->shader_desc.patch_constant_signature; ++ normaliser.input_signature = &program->input_signature; ++ normaliser.output_signature = &program->output_signature; ++ normaliser.patch_constant_signature = &program->patch_constant_signature; + + for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) + { +@@ -1439,9 +1442,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse + } + } + +- if (!shader_signature_merge(&parser->shader_desc.input_signature, normaliser.input_range_map, false) +- || !shader_signature_merge(&parser->shader_desc.output_signature, normaliser.output_range_map, false) +- || !shader_signature_merge(&parser->shader_desc.patch_constant_signature, normaliser.pc_range_map, true)) ++ if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) ++ || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) ++ || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) + { + program->instructions = normaliser.instructions; + return VKD3D_ERROR_OUT_OF_MEMORY; +@@ -3065,7 +3068,7 @@ static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t id + + struct vsir_block + { +- unsigned int label; ++ unsigned int label, order_pos; + /* `begin' points to the instruction immediately following the + * LABEL that introduces the block. `end' points to the terminator + * instruction (either BRANCH or RET). They can coincide, meaning +@@ -3140,6 +3143,37 @@ struct vsir_cfg + size_t *loops_by_header; + + struct vsir_block_list order; ++ struct cfg_loop_interval ++ { ++ /* `begin' is the position of the first block of the loop in ++ * the topological sort; `end' is the position of the first ++ * block after the loop. In other words, `begin' is where a ++ * `continue' instruction would jump and `end' is where a ++ * `break' instruction would jump. */ ++ unsigned int begin, end; ++ /* Each loop interval can be natural or synthetic. Natural ++ * intervals are added to represent loops given by CFG back ++ * edges. Synthetic intervals do not correspond to loops in ++ * the input CFG, but are added to leverage their `break' ++ * instruction in order to execute forward edges. ++ * ++ * For a synthetic loop interval it's not really important ++ * which one is the `begin' block, since we don't need to ++ * execute `continue' for them. So we have some leeway for ++ * moving it provided that these conditions are met: 1. the ++ * interval must contain all `break' instructions that target ++ * it, which in practice means that `begin' can be moved ++ * backward and not forward; 2. intervals must remain properly ++ * nested (for each pair of intervals, either one contains the ++ * other or they are disjoint). ++ * ++ * Subject to these conditions, we try to reuse the same loop ++ * as much as possible (if many forward edges target the same ++ * block), but we still try to keep `begin' as forward as ++ * possible, to keep the loop scope as small as possible. */ ++ bool synthetic; ++ } *loop_intervals; ++ size_t loop_interval_count, loop_interval_capacity; + }; + + static void vsir_cfg_cleanup(struct vsir_cfg *cfg) +@@ -3157,11 +3191,30 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) + vkd3d_free(cfg->blocks); + vkd3d_free(cfg->loops); + vkd3d_free(cfg->loops_by_header); ++ vkd3d_free(cfg->loop_intervals); + + if (TRACE_ON()) + vkd3d_string_buffer_cleanup(&cfg->debug_buffer); + } + ++static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, ++ unsigned int end, bool synthetic) ++{ ++ struct cfg_loop_interval *interval; ++ ++ if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, ++ cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ interval = &cfg->loop_intervals[cfg->loop_interval_count++]; ++ ++ interval->begin = begin; ++ interval->end = end; ++ interval->synthetic = synthetic; ++ ++ return VKD3D_OK; ++} ++ + static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) + { + return bitmap_is_set(b1->dominates, b2->label - 1); +@@ -3396,14 +3449,14 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) + * block without passing through the header block) belong to the same + * loop. + * +- * If the input CFG is reducible, each two loops are either disjoint +- * or one is a strict subset of the other, provided that each block +- * has at most one incoming back edge. If this condition does not +- * hold, a synthetic block can be introduced as the only back edge +- * block for the given header block, with all the previous back edge +- * now being forward edges to the synthetic block. This is not +- * currently implemented (but it is rarely found in practice +- * anyway). */ ++ * If the input CFG is reducible its loops are properly nested (i.e., ++ * each two loops are either disjoint or one is contained in the ++ * other), provided that each block has at most one incoming back ++ * edge. If this condition does not hold, a synthetic block can be ++ * introduced as the only back edge block for the given header block, ++ * with all the previous back edge now being forward edges to the ++ * synthetic block. This is not currently implemented (but it is ++ * rarely found in practice anyway). */ + static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, + struct vsir_block *header) + { +@@ -3496,6 +3549,7 @@ struct vsir_cfg_node_sorter + { + struct vsir_block_list *loop; + unsigned int seen_count; ++ unsigned int begin; + } *stack; + size_t stack_count, stack_capacity; + struct vsir_block_list available_blocks; +@@ -3522,6 +3576,7 @@ static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cf + item = &sorter->stack[sorter->stack_count++]; + item->loop = loop; + item->seen_count = 0; ++ item->begin = sorter->cfg->order.count; + + return VKD3D_OK; + } +@@ -3628,6 +3683,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) + } + + vsir_block_list_remove_index(&sorter.available_blocks, i); ++ block->order_pos = cfg->order.count; + if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) + goto fail; + +@@ -3646,6 +3702,10 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + ++ if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, ++ cfg->order.count, false)) < 0) ++ goto fail; ++ + new_seen_count = inner_stack_item->loop->count; + --sorter.stack_count; + } +@@ -3706,6 +3766,143 @@ fail: + return ret; + } + ++/* Sort loop intervals first by ascending begin time and then by ++ * descending end time, so that inner intervals appear after outer ++ * ones and disjoint intervals appear in their proper order. */ ++static int compare_loop_intervals(const void *ptr1, const void *ptr2) ++{ ++ const struct cfg_loop_interval *interval1 = ptr1; ++ const struct cfg_loop_interval *interval2 = ptr2; ++ ++ if (interval1->begin != interval2->begin) ++ return vkd3d_u32_compare(interval1->begin, interval2->begin); ++ ++ return -vkd3d_u32_compare(interval1->end, interval2->end); ++} ++ ++static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) ++{ ++ enum vkd3d_result ret; ++ size_t i, j, k; ++ ++ for (i = 0; i < cfg->block_count; ++i) ++ { ++ struct vsir_block *block = &cfg->blocks[i]; ++ ++ if (block->label == 0) ++ continue; ++ ++ for (j = 0; j < block->successors.count; ++j) ++ { ++ struct vsir_block *successor = block->successors.blocks[j]; ++ struct cfg_loop_interval *extend = NULL; ++ unsigned int begin; ++ enum ++ { ++ ACTION_DO_NOTHING, ++ ACTION_CREATE_NEW, ++ ACTION_EXTEND, ++ } action = ACTION_CREATE_NEW; ++ ++ /* We've already contructed loop intervals for the back ++ * edges, there's nothing more to do. */ ++ if (vsir_block_dominates(successor, block)) ++ continue; ++ ++ assert(block->order_pos < successor->order_pos); ++ ++ /* Jumping from a block to the following one is always ++ * possible, so nothing to do. */ ++ if (block->order_pos + 1 == successor->order_pos) ++ continue; ++ ++ /* Let's look for a loop interval that already breaks at ++ * `successor' and either contains or can be extended to ++ * contain `block'. */ ++ for (k = 0; k < cfg->loop_interval_count; ++k) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; ++ ++ if (interval->end != successor->order_pos) ++ continue; ++ ++ if (interval->begin <= block->order_pos) ++ { ++ action = ACTION_DO_NOTHING; ++ break; ++ } ++ ++ if (interval->synthetic) ++ { ++ action = ACTION_EXTEND; ++ extend = interval; ++ break; ++ } ++ } ++ ++ if (action == ACTION_DO_NOTHING) ++ continue; ++ ++ /* Ok, we have to decide where the new or replacing ++ * interval has to begin. These are the rules: 1. it must ++ * begin before `block'; 2. intervals must be properly ++ * nested; 3. the new interval should begin as late as ++ * possible, to limit control flow depth and extension. */ ++ begin = block->order_pos; ++ ++ /* Our candidate interval is always [begin, ++ * successor->order_pos), and we move `begin' backward ++ * until the candidate interval contains all the intervals ++ * whose endpoint lies in the candidate interval ++ * itself. */ ++ for (k = 0; k < cfg->loop_interval_count; ++k) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; ++ ++ if (begin < interval->end && interval->end < successor->order_pos) ++ begin = min(begin, interval->begin); ++ } ++ ++ /* New we have to care about the intervals whose begin ++ * point lies in the candidate interval. We cannot move ++ * the candidate interval endpoint, because it is ++ * important that the loop break target matches ++ * `successor'. So we have to move that interval's begin ++ * point to the begin point of the candidate interval, ++ * i.e. `begin'. But what if the interval we should extend ++ * backward is not synthetic? This cannot happen, ++ * fortunately, because it would mean that there is a jump ++ * entering a loop via a block which is not the loop ++ * header, so the CFG would not be reducible. */ ++ for (k = 0; k < cfg->loop_interval_count; ++k) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; ++ ++ if (interval->begin < successor->order_pos && successor->order_pos < interval->end) ++ { ++ if (interval->synthetic) ++ interval->begin = min(begin, interval->begin); ++ assert(begin >= interval->begin); ++ } ++ } ++ ++ if (action == ACTION_EXTEND) ++ extend->begin = begin; ++ else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) ++ return ret; ++ } ++ } ++ ++ qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); ++ ++ if (TRACE_ON()) ++ for (i = 0; i < cfg->loop_interval_count; ++i) ++ TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", ++ cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); ++ ++ return VKD3D_OK; ++} ++ + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) + { +@@ -3717,7 +3914,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + if ((result = instruction_array_lower_texkills(parser)) < 0) + return result; + +- if (parser->shader_desc.is_dxil) ++ if (parser->program.shader_version.major >= 6) + { + struct vsir_cfg cfg; + +@@ -3744,6 +3941,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + return result; + } + ++ if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) ++ { ++ vsir_cfg_cleanup(&cfg); ++ return result; ++ } ++ + if ((result = simple_structurizer_run(parser)) < 0) + { + vsir_cfg_cleanup(&cfg); +@@ -3766,7 +3969,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + return result; + + if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, +- &parser->shader_desc.input_signature)) < 0) ++ &parser->program.input_signature)) < 0) + return result; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 5856f2f04ba..0568407f997 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -223,6 +223,11 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d + } + } + ++static bool data_type_is_floating_point(enum vkd3d_data_type data_type) ++{ ++ return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; ++} ++ + #define VKD3D_SPIRV_VERSION 0x00010000 + #define VKD3D_SPIRV_GENERATOR_ID 18 + #define VKD3D_SPIRV_GENERATOR_VERSION 11 +@@ -1838,6 +1843,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder + { + switch (data_type) + { ++ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_SNORM: + case VKD3D_DATA_UNORM: +@@ -1845,6 +1851,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder + break; + case VKD3D_DATA_INT: + case VKD3D_DATA_UINT: ++ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ + return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); + break; + case VKD3D_DATA_DOUBLE: +@@ -2444,14 +2451,14 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) + vkd3d_free(compiler); + } + +-static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, +- struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, ++static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, + uint64_t config_flags) + { +- const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; +- const struct shader_signature *output_signature = &shader_desc->output_signature; ++ const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; ++ const struct shader_signature *output_signature = &program->output_signature; + const struct vkd3d_shader_interface_info *shader_interface; + const struct vkd3d_shader_descriptor_offset_info *offset_info; + const struct vkd3d_shader_spirv_target_info *target_info; +@@ -2562,7 +2569,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve + + rb_init(&compiler->symbol_table, vkd3d_symbol_compare); + +- compiler->shader_type = shader_version->type; ++ compiler->shader_type = program->shader_version.type; + + if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) + { +@@ -3753,6 +3760,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil + return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); + } + ++/* Based on the implementation in the OpenGL Mathematics library. */ ++static uint32_t half_to_float(uint16_t value) ++{ ++ uint32_t s = (value & 0x8000u) << 16; ++ uint32_t e = (value >> 10) & 0x1fu; ++ uint32_t m = value & 0x3ffu; ++ ++ if (!e) ++ { ++ if (!m) ++ { ++ /* Plus or minus zero */ ++ return s; ++ } ++ else ++ { ++ /* Denormalized number -- renormalize it */ ++ ++ while (!(m & 0x400u)) ++ { ++ m <<= 1; ++ --e; ++ } ++ ++ ++e; ++ m &= ~0x400u; ++ } ++ } ++ else if (e == 31u) ++ { ++ /* Positive or negative infinity for zero 'm'. ++ * Nan for non-zero 'm' -- preserve sign and significand bits */ ++ return s | 0x7f800000u | (m << 13); ++ } ++ ++ /* Normalized number */ ++ e += 127u - 15u; ++ m <<= 13; ++ ++ /* Assemble s, e and m. */ ++ return s | (e << 23) | m; ++} ++ ++static uint32_t convert_raw_constant32(enum vkd3d_data_type data_type, unsigned int uint_value) ++{ ++ int16_t i; ++ ++ /* TODO: native 16-bit support. */ ++ if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) ++ return uint_value; ++ ++ if (data_type == VKD3D_DATA_HALF) ++ return half_to_float(uint_value); ++ ++ /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or ++ * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows ++ * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These ++ * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not ++ * extended, and results match SM 5. It seems best to replicate the sign-extension, and if ++ * execution is 16-bit, the values will be truncated. */ ++ i = uint_value; ++ return (int32_t)i; ++} ++ + static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) + { +@@ -3765,14 +3836,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile + if (reg->dimension == VSIR_DIMENSION_SCALAR) + { + for (i = 0; i < component_count; ++i) +- values[i] = *reg->u.immconst_u32; ++ values[i] = convert_raw_constant32(reg->data_type, reg->u.immconst_u32[0]); + } + else + { + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) +- values[j++] = reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]; ++ values[j++] = convert_raw_constant32(reg->data_type, ++ reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]); + } + } + +@@ -3916,6 +3988,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil + + switch (icb->data_type) + { ++ case VKD3D_DATA_HALF: ++ case VKD3D_DATA_UINT16: ++ /* Scalar only. */ ++ for (i = 0; i < element_count; ++i) ++ elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, ++ convert_raw_constant32(icb->data_type, icb->data[i])); ++ break; + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_INT: + case VKD3D_DATA_UINT: +@@ -4104,7 +4183,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, + uint32_t type_id; + + type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); +- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) ++ if (data_type_is_floating_point(reg->data_type)) + return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); + + FIXME("Unhandled data type %#x.\n", reg->data_type); +@@ -4118,7 +4197,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, + uint32_t type_id; + + type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); +- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) ++ if (data_type_is_floating_point(reg->data_type)) + return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); + else if (data_type_is_integer(reg->data_type)) + return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); +@@ -4302,7 +4381,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, + } + + type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); +- if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) ++ if (data_type_is_floating_point(reg->data_type)) + return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); + + FIXME("Unhandled data type %#x.\n", reg->data_type); +@@ -6903,7 +6982,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, + assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); + + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); +- if (dst->reg.data_type == VKD3D_DATA_FLOAT) ++ if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) + { + val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + } +@@ -6912,7 +6991,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, + /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ + val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + } +- else if (dst->reg.data_type == VKD3D_DATA_UINT) ++ else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) + { + val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); + } +@@ -7205,8 +7284,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, + type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + + if (src[0].reg.data_type != VKD3D_DATA_BOOL) +- condition_id = spirv_compiler_emit_int_to_bool(compiler, +- VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); ++ { ++ if (instruction->handler_idx == VKD3DSIH_CMP) ++ condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, ++ vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, ++ spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); ++ else ++ condition_id = spirv_compiler_emit_int_to_bool(compiler, ++ VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); ++ } + val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +@@ -7738,6 +7824,36 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + ++static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t src0_id, src1_id, type_id, result_id; ++ unsigned int component_count; ++ SpvOp op; ++ ++ switch (instruction->handler_idx) ++ { ++ case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; ++ case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ component_count = vsir_write_mask_component_count(dst->write_mask); ++ ++ src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); ++ ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); ++ result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); ++ ++ result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); ++ spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); ++} ++ + static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) + { +@@ -9628,6 +9744,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + break; + case VKD3DSIH_DMOVC: + case VKD3DSIH_MOVC: ++ case VKD3DSIH_CMP: + spirv_compiler_emit_movc(compiler, instruction); + break; + case VKD3DSIH_SWAPC: +@@ -9752,6 +9869,10 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_UNO: + spirv_compiler_emit_orderedness_instruction(compiler, instruction); + break; ++ case VKD3DSIH_SLT: ++ case VKD3DSIH_SGE: ++ spirv_compiler_emit_float_comparison_instruction(compiler, instruction); ++ break; + case VKD3DSIH_BFI: + case VKD3DSIH_IBFE: + case VKD3DSIH_UBFE: +@@ -9982,7 +10103,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; + struct vkd3d_shader_instruction_array instructions; + struct vsir_program *program = &parser->program; + enum vkd3d_result result = VKD3D_OK; +@@ -10007,12 +10127,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + instructions = program->instructions; + memset(&program->instructions, 0, sizeof(program->instructions)); + +- compiler->input_signature = shader_desc->input_signature; +- compiler->output_signature = shader_desc->output_signature; +- compiler->patch_constant_signature = shader_desc->patch_constant_signature; +- memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); +- memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); +- memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); ++ compiler->input_signature = program->input_signature; ++ compiler->output_signature = program->output_signature; ++ compiler->patch_constant_signature = program->patch_constant_signature; ++ memset(&program->input_signature, 0, sizeof(program->input_signature)); ++ memset(&program->output_signature, 0, sizeof(program->output_signature)); ++ memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); + compiler->use_vocp = program->use_vocp; + compiler->block_names = program->block_names; + compiler->block_name_count = program->block_name_count; +@@ -10119,8 +10239,8 @@ int spirv_compile(struct vkd3d_shader_parser *parser, + struct spirv_compiler *spirv_compiler; + int ret; + +- if (!(spirv_compiler = spirv_compiler_create(&parser->program.shader_version, &parser->shader_desc, +- compile_info, scan_descriptor_info, message_context, &parser->location, parser->config_flags))) ++ if (!(spirv_compiler = spirv_compiler_create(&parser->program, compile_info, ++ scan_descriptor_info, message_context, &parser->location, parser->config_flags))) + { + ERR("Failed to create SPIR-V compiler.\n"); + return VKD3D_ERROR; +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 3be4e40ab0c..bd558693b07 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -954,32 +954,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins + case VKD3DSPR_INCONTROLPOINT: + io_masks = priv->input_register_masks; + ranges = &priv->input_index_ranges; +- signature = &priv->p.shader_desc.input_signature; ++ signature = &priv->p.program.input_signature; + break; + case VKD3DSPR_OUTPUT: + if (sm4_parser_is_in_fork_or_join_phase(priv)) + { + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; +- signature = &priv->p.shader_desc.patch_constant_signature; ++ signature = &priv->p.program.patch_constant_signature; + } + else + { + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; +- signature = &priv->p.shader_desc.output_signature; ++ signature = &priv->p.program.output_signature; + } + break; + case VKD3DSPR_COLOROUT: + case VKD3DSPR_OUTCONTROLPOINT: + io_masks = priv->output_register_masks; + ranges = &priv->output_index_ranges; +- signature = &priv->p.shader_desc.output_signature; ++ signature = &priv->p.program.output_signature; + break; + case VKD3DSPR_PATCHCONST: + io_masks = priv->patch_constant_register_masks; + ranges = &priv->patch_constant_index_ranges; +- signature = &priv->p.shader_desc.patch_constant_signature; ++ signature = &priv->p.program.patch_constant_signature; + break; + + default: +@@ -1113,7 +1113,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u + if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) + { + struct signature_element *e = vsir_signature_find_element_for_reg( +- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); ++ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + + e->interpolation_mode = ins->flags; + } +@@ -1128,7 +1128,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in + if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) + { + struct signature_element *e = vsir_signature_find_element_for_reg( +- &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); ++ &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + + e->interpolation_mode = ins->flags; + } +@@ -1748,7 +1748,6 @@ static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) + struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); + + vsir_program_cleanup(&parser->program); +- free_shader_desc(&parser->shader_desc); + vkd3d_free(sm4); + } + +@@ -2504,7 +2503,7 @@ static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = + }; + + static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, +- size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, ++ size_t byte_code_size, const char *source_name, + struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_version version; +@@ -2648,9 +2647,9 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) + { + struct vkd3d_shader_instruction_array *instructions; +- struct vkd3d_shader_desc *shader_desc; + struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_sm4_parser *sm4; ++ struct dxbc_shader_desc dxbc_desc = {0}; + int ret; + + if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) +@@ -2659,36 +2658,40 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +- shader_desc = &sm4->p.shader_desc; +- shader_desc->is_dxil = false; ++ dxbc_desc.is_dxil = false; + if ((ret = shader_extract_from_dxbc(&compile_info->source, +- message_context, compile_info->source_name, shader_desc)) < 0) ++ message_context, compile_info->source_name, &dxbc_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm4); + return ret; + } + +- if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, +- compile_info->source_name, &shader_desc->output_signature, message_context)) ++ if (!shader_sm4_init(sm4, dxbc_desc.byte_code, dxbc_desc.byte_code_size, ++ compile_info->source_name, message_context)) + { + WARN("Failed to initialise shader parser.\n"); +- free_shader_desc(shader_desc); ++ free_dxbc_shader_desc(&dxbc_desc); + vkd3d_free(sm4); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + ++ sm4->p.program.input_signature = dxbc_desc.input_signature; ++ sm4->p.program.output_signature = dxbc_desc.output_signature; ++ sm4->p.program.patch_constant_signature = dxbc_desc.patch_constant_signature; ++ memset(&dxbc_desc, 0, sizeof(dxbc_desc)); ++ + /* DXBC stores used masks inverted for output signatures, for some reason. + * We return them un-inverted. */ +- uninvert_used_masks(&shader_desc->output_signature); ++ uninvert_used_masks(&sm4->p.program.output_signature); + if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) +- uninvert_used_masks(&shader_desc->patch_constant_signature); ++ uninvert_used_masks(&sm4->p.program.patch_constant_signature); + +- if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, ++ if (!shader_sm4_parser_validate_signature(sm4, &sm4->p.program.input_signature, + sm4->input_register_masks, "Input") +- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, ++ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.output_signature, + sm4->output_register_masks, "Output") +- || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, ++ || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.patch_constant_signature, + sm4->patch_constant_register_masks, "Patch constant")) + { + shader_sm4_destroy(&sm4->p); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index d128d84d6ea..1ac372f163c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -1450,11 +1450,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + + if (!ret && signature_info) + { +- if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) ++ if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->program.input_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, +- &parser->shader_desc.output_signature) ++ &parser->program.output_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, +- &parser->shader_desc.patch_constant_signature)) ++ &parser->program.patch_constant_signature)) + { + ret = VKD3D_ERROR_OUT_OF_MEMORY; + } +@@ -1553,7 +1553,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; +- struct vkd3d_glsl_generator *glsl_generator; ++ struct vsir_program *program = &parser->program; + struct vkd3d_shader_compile_info scan_info; + int ret; + +@@ -1562,23 +1562,13 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + switch (compile_info->target_type) + { + case VKD3D_SHADER_TARGET_D3D_ASM: +- ret = vkd3d_dxbc_binary_to_text(&parser->program, &parser->shader_desc, +- compile_info, out, VSIR_ASM_FLAG_NONE); ++ ret = d3d_asm_compile(program, compile_info, out, VSIR_ASM_FLAG_NONE); + break; + + case VKD3D_SHADER_TARGET_GLSL: + if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) + return ret; +- if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->program.shader_version, +- message_context, &parser->location))) +- { +- ERR("Failed to create GLSL generator.\n"); +- vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); +- return VKD3D_ERROR; +- } +- +- ret = vkd3d_glsl_generator_generate(glsl_generator, &parser->program, out); +- vkd3d_glsl_generator_destroy(glsl_generator); ++ ret = glsl_compile(program, out, message_context); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index d257d953dd5..98c311b3655 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -149,6 +149,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, + VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, + VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, ++ VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +@@ -623,14 +624,16 @@ enum vkd3d_data_type + VKD3D_DATA_UINT8, + VKD3D_DATA_UINT64, + VKD3D_DATA_BOOL, ++ VKD3D_DATA_UINT16, ++ VKD3D_DATA_HALF, + + VKD3D_DATA_COUNT, + }; + + static inline bool data_type_is_integer(enum vkd3d_data_type data_type) + { +- return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT +- || data_type == VKD3D_DATA_UINT64; ++ return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 ++ || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; + } + + static inline bool data_type_is_bool(enum vkd3d_data_type data_type) +@@ -1028,7 +1031,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade + unsigned int reg_idx, unsigned int write_mask); + void shader_signature_cleanup(struct shader_signature *signature); + +-struct vkd3d_shader_desc ++struct dxbc_shader_desc + { + const uint32_t *byte_code; + size_t byte_code_size; +@@ -1036,7 +1039,10 @@ struct vkd3d_shader_desc + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; ++}; + ++struct vkd3d_shader_desc ++{ + struct + { + uint32_t used, external; +@@ -1293,6 +1299,10 @@ struct vsir_program + struct vkd3d_shader_version shader_version; + struct vkd3d_shader_instruction_array instructions; + ++ struct shader_signature input_signature; ++ struct shader_signature output_signature; ++ struct shader_signature patch_constant_signature; ++ + unsigned int input_control_point_count, output_control_point_count; + unsigned int block_count; + unsigned int temp_count; +@@ -1394,8 +1404,8 @@ enum vsir_asm_flags + VSIR_ASM_FLAG_DUMP_TYPES = 0x1, + }; + +-enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +- const struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, ++enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, enum vsir_asm_flags flags); + void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); + struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); +@@ -1487,20 +1497,15 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); + +-void free_shader_desc(struct vkd3d_shader_desc *desc); ++void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); + + int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, +- struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); ++ struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc); + int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); + +-struct vkd3d_glsl_generator; +- +-struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, +- struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); +-int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, +- struct vsir_program *program, struct vkd3d_shader_code *out); +-void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); ++int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context); + + #define SPIRV_MAX_SRC_COUNT 6 + +@@ -1524,10 +1529,12 @@ static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_ty + { + switch (data_type) + { ++ case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ + case VKD3D_DATA_FLOAT: + case VKD3D_DATA_UNORM: + case VKD3D_DATA_SNORM: + return VKD3D_SHADER_COMPONENT_FLOAT; ++ case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ + case VKD3D_DATA_UINT: + return VKD3D_SHADER_COMPONENT_UINT; + case VKD3D_DATA_INT: +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index da9c1d964d4..7841a811bf7 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -94,6 +94,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = + VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), + VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), ++ VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), + VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), + VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2), + VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), +@@ -789,6 +790,7 @@ struct vkd3d_physical_device_info + VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; + VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; ++ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_features; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features; + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; + VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; +@@ -808,6 +810,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; ++ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; + VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; + VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; +@@ -825,6 +828,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + conditional_rendering_features = &info->conditional_rendering_features; + depth_clip_features = &info->depth_clip_features; + descriptor_indexing_features = &info->descriptor_indexing_features; ++ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; + robustness2_features = &info->robustness2_features; + descriptor_indexing_properties = &info->descriptor_indexing_properties; + maintenance3_properties = &info->maintenance3_properties; +@@ -846,6 +850,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i + vk_prepend_struct(&info->features2, depth_clip_features); + descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; + vk_prepend_struct(&info->features2, descriptor_indexing_features); ++ fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, fragment_shader_interlock_features); + robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + vk_prepend_struct(&info->features2, robustness2_features); + demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; +@@ -1158,6 +1164,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic + + static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) + { ++ const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; + const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; + const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; +@@ -1279,6 +1286,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev + TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); + TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); + ++ fragment_shader_interlock_features = &info->fragment_shader_interlock_features; ++ TRACE(" VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT:\n"); ++ TRACE(" fragmentShaderSampleInterlock: %#x.\n.", ++ fragment_shader_interlock_features->fragmentShaderSampleInterlock); ++ TRACE(" fragmentShaderPixelInterlock: %#x\n.", ++ fragment_shader_interlock_features->fragmentShaderPixelInterlock); ++ TRACE(" fragmentShaderShadingRateInterlock: %#x\n.", ++ fragment_shader_interlock_features->fragmentShaderShadingRateInterlock); ++ + demote_features = &info->demote_features; + TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); + TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); +@@ -1476,6 +1492,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + uint32_t *device_extension_count, bool **user_extension_supported) + { + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; ++ VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; + const struct vkd3d_optional_device_extensions_info *optional_extensions; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; + VkPhysicalDevice physical_device = device->vk_physical_device; +@@ -1539,8 +1556,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + + device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat + && d3d12_device_supports_typed_uav_load_additional_formats(device); +- /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ +- device->feature_options.ROVsSupported = FALSE; + /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ + device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; + device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ +@@ -1619,6 +1634,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + *user_extension_supported, vulkan_info, "device", + device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + ++ fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; ++ if (!fragment_shader_interlock->fragmentShaderSampleInterlock ++ || !fragment_shader_interlock->fragmentShaderPixelInterlock) ++ vulkan_info->EXT_fragment_shader_interlock = false; ++ device->feature_options.ROVsSupported = vulkan_info->EXT_fragment_shader_interlock; ++ + if (!physical_device_info->conditional_rendering_features.conditionalRendering) + vulkan_info->EXT_conditional_rendering = false; + if (!physical_device_info->depth_clip_features.depthClipEnable) +@@ -1675,6 +1696,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; + ++ if (vulkan_info->EXT_fragment_shader_interlock) ++ vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] ++ = VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK; ++ + if (vulkan_info->EXT_shader_stencil_export) + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 1f47ddd5023..34a98c4fc3d 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -55,7 +55,7 @@ + + #define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u + #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u +-#define VKD3D_MAX_SHADER_EXTENSIONS 4u ++#define VKD3D_MAX_SHADER_EXTENSIONS 5u + #define VKD3D_MAX_SHADER_STAGES 5u + #define VKD3D_MAX_VK_SYNC_OBJECTS 4u + #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u +@@ -133,6 +133,7 @@ struct vkd3d_vulkan_info + bool EXT_debug_marker; + bool EXT_depth_clip_enable; + bool EXT_descriptor_indexing; ++ bool EXT_fragment_shader_interlock; + bool EXT_mutable_descriptor_type; + bool EXT_robustness2; + bool EXT_shader_demote_to_helper_invocation; +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-374c5fcbdd91b6b7e6c362c79871ddf30f0.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-374c5fcbdd91b6b7e6c362c79871ddf30f0.patch new file mode 100644 index 00000000..614a5f58 --- /dev/null +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-374c5fcbdd91b6b7e6c362c79871ddf30f0.patch @@ -0,0 +1,581 @@ +From cbdcf283637d0b06fe2da0aa61dc56510bad09eb Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 14 Mar 2024 20:16:07 +1100 +Subject: [PATCH] Updated vkd3d to 374c5fcbdd91b6b7e6c362c79871ddf30f0ccee2. + +--- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 20 +-- + libs/vkd3d/libs/vkd3d-shader/ir.c | 178 +++++++++++++++------------ + libs/vkd3d/libs/vkd3d-shader/spirv.c | 19 +++ + 3 files changed, 125 insertions(+), 92 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 7c630d181ef..da90782c814 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -20,7 +20,7 @@ + + struct vkd3d_glsl_generator + { +- struct vkd3d_shader_version version; ++ struct vsir_program *program; + struct vkd3d_string_buffer buffer; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; +@@ -42,7 +42,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( + static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + const struct vkd3d_shader_instruction *ins) + { +- const struct vkd3d_shader_version *version = &generator->version; ++ const struct vkd3d_shader_version *version = &generator->program->shader_version; + + /* + * TODO: Implement in_subroutine +@@ -76,9 +76,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator + } + } + +-static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, +- struct vsir_program *program, struct vkd3d_shader_code *out) ++static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, struct vkd3d_shader_code *out) + { ++ const struct vkd3d_shader_instruction_array *instructions = &generator->program->instructions; + unsigned int i; + void *code; + +@@ -87,9 +87,9 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); + vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); + +- for (i = 0; i < program->instructions.count; ++i) ++ for (i = 0; i < instructions->count; ++i) + { +- vkd3d_glsl_handle_instruction(generator, &program->instructions.elements[i]); ++ vkd3d_glsl_handle_instruction(generator, &instructions->elements[i]); + } + + if (generator->failed) +@@ -114,10 +114,10 @@ static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) + } + + static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, +- const struct vkd3d_shader_version *version, struct vkd3d_shader_message_context *message_context) ++ struct vsir_program *program, struct vkd3d_shader_message_context *message_context) + { + memset(gen, 0, sizeof(*gen)); +- gen->version = *version; ++ gen->program = program; + vkd3d_string_buffer_init(&gen->buffer); + gen->message_context = message_context; + } +@@ -128,8 +128,8 @@ int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, + struct vkd3d_glsl_generator generator; + int ret; + +- vkd3d_glsl_generator_init(&generator, &program->shader_version, message_context); +- ret = vkd3d_glsl_generator_generate(&generator, program, out); ++ vkd3d_glsl_generator_init(&generator, program, message_context); ++ ret = vkd3d_glsl_generator_generate(&generator, out); + vkd3d_glsl_generator_cleanup(&generator); + + return ret; +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 759c89957d6..0dd31af9192 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -94,9 +94,8 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, + return true; + } + +-static enum vkd3d_result instruction_array_lower_texkills(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program) + { +- struct vsir_program *program = &parser->program; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *texkill_ins, *ins; + unsigned int components_read = 3 + (program->shader_version.major >= 2); +@@ -230,10 +229,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( + return NULL; + } + +-static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_compile_info *compile_info) ++static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) + { +- struct shader_signature *signature = &parser->program.output_signature; ++ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; ++ struct shader_signature *signature = &program->output_signature; + const struct vkd3d_shader_varying_map_info *varying_map; + unsigned int i; + +@@ -255,7 +255,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars + * location with a different mask. */ + if (input_mask && input_mask != e->mask) + { +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "Output mask %#x does not match input mask %#x.", + e->mask, input_mask); +@@ -272,7 +272,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars + { + if (varying_map->varying_map[i].output_signature_index >= signature->element_count) + { +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "The next stage consumes varyings not written by this stage."); + return VKD3D_ERROR_NOT_IMPLEMENTED; +@@ -2666,33 +2666,36 @@ fail: + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +-static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src); ++static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, ++ struct vkd3d_shader_src_param *src); + + /* This is idempotent: it can be safely applied more than once on the + * same register. */ +-static void materialize_ssas_to_temps_process_reg(struct vkd3d_shader_parser *parser, struct vkd3d_shader_register *reg) ++static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) + { + unsigned int i; + + if (reg->type == VKD3DSPR_SSA) + { + reg->type = VKD3DSPR_TEMP; +- reg->idx[0].offset += parser->program.temp_count; ++ reg->idx[0].offset += program->temp_count; + } + + for (i = 0; i < reg->idx_count; ++i) + if (reg->idx[i].rel_addr) +- materialize_ssas_to_temps_process_src_param(parser, reg->idx[i].rel_addr); ++ materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); + } + +-static void materialize_ssas_to_temps_process_dst_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_dst_param *dst) ++static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, ++ struct vkd3d_shader_dst_param *dst) + { +- materialize_ssas_to_temps_process_reg(parser, &dst->reg); ++ materialize_ssas_to_temps_process_reg(program, &dst->reg); + } + +-static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src) ++static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, ++ struct vkd3d_shader_src_param *src) + { +- materialize_ssas_to_temps_process_reg(parser, &src->reg); ++ materialize_ssas_to_temps_process_reg(program, &src->reg); + } + + static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, +@@ -2711,7 +2714,7 @@ static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_so + vkd3d_unreachable(); + } + +-static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser *parser, ++static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, + struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, + const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, + const struct vkd3d_shader_src_param *source, bool invert) +@@ -2719,7 +2722,7 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser + struct vkd3d_shader_src_param *src; + struct vkd3d_shader_dst_param *dst; + +- if (!vsir_instruction_init_with_params(&parser->program, instruction, loc, ++ if (!vsir_instruction_init_with_params(program, instruction, loc, + cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) + return false; + +@@ -2727,7 +2730,7 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser + src = instruction->src; + + dst[0] = *dest; +- materialize_ssas_to_temps_process_dst_param(parser, &dst[0]); ++ materialize_ssas_to_temps_process_dst_param(program, &dst[0]); + + assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); + assert(dst[0].modifiers == 0); +@@ -2739,19 +2742,19 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser + src[1 + invert] = *source; + memset(&src[2 - invert], 0, sizeof(src[2 - invert])); + src[2 - invert].reg = dst[0].reg; +- materialize_ssas_to_temps_process_src_param(parser, &src[1]); +- materialize_ssas_to_temps_process_src_param(parser, &src[2]); ++ materialize_ssas_to_temps_process_src_param(program, &src[1]); ++ materialize_ssas_to_temps_process_src_param(program, &src[2]); + } + else + { + src[0] = *source; +- materialize_ssas_to_temps_process_src_param(parser, &src[0]); ++ materialize_ssas_to_temps_process_src_param(program, &src[0]); + } + + return true; + } + +-static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) + { + struct vkd3d_shader_instruction *instructions = NULL; + struct materialize_ssas_to_temps_block_data +@@ -2762,18 +2765,18 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + size_t ins_capacity = 0, ins_count = 0, i; + unsigned int current_label = 0; + +- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) ++ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) + goto fail; + +- if (!(block_index = vkd3d_calloc(parser->program.block_count, sizeof(*block_index)))) ++ if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) + { + ERR("Failed to allocate block index.\n"); + goto fail; + } + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + switch (ins->handler_idx) + { +@@ -2795,16 +2798,16 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + } + } + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + size_t j; + + for (j = 0; j < ins->dst_count; ++j) +- materialize_ssas_to_temps_process_dst_param(parser, &ins->dst[j]); ++ materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); + + for (j = 0; j < ins->src_count; ++j) +- materialize_ssas_to_temps_process_src_param(parser, &ins->src[j]); ++ materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); + + switch (ins->handler_idx) + { +@@ -2825,9 +2828,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + { + const struct vkd3d_shader_src_param *source; + +- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, +- &parser->program.instructions.elements[j].dst[0], NULL, source, false)) ++ source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], ++ current_label); ++ if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], ++ &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) + goto fail; + + ++ins_count; +@@ -2847,9 +2851,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + { + const struct vkd3d_shader_src_param *source; + +- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, +- &parser->program.instructions.elements[j].dst[0], cond, source, false)) ++ source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], ++ current_label); ++ if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], ++ &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) + goto fail; + + ++ins_count; +@@ -2859,9 +2864,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + { + const struct vkd3d_shader_src_param *source; + +- source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); +- if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, +- &parser->program.instructions.elements[j].dst[0], cond, source, true)) ++ source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], ++ current_label); ++ if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], ++ &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) + goto fail; + + ++ins_count; +@@ -2883,13 +2889,13 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p + instructions[ins_count++] = *ins; + } + +- vkd3d_free(parser->program.instructions.elements); ++ vkd3d_free(program->instructions.elements); + vkd3d_free(block_index); +- parser->program.instructions.elements = instructions; +- parser->program.instructions.capacity = ins_capacity; +- parser->program.instructions.count = ins_count; +- parser->program.temp_count += parser->program.ssa_count; +- parser->program.ssa_count = 0; ++ program->instructions.elements = instructions; ++ program->instructions.capacity = ins_capacity; ++ program->instructions.count = ins_count; ++ program->temp_count += program->ssa_count; ++ program->ssa_count = 0; + + return VKD3D_OK; + +@@ -2900,20 +2906,20 @@ fail: + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +-static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_structurise(struct vsir_program *program) + { +- const unsigned int block_temp_idx = parser->program.temp_count; ++ const unsigned int block_temp_idx = program->temp_count; + struct vkd3d_shader_instruction *instructions = NULL; + const struct vkd3d_shader_location no_loc = {0}; + size_t ins_capacity = 0, ins_count = 0, i; + bool first_label_found = false; + +- if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) ++ if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) + goto fail; + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + switch (ins->handler_idx) + { +@@ -2929,23 +2935,27 @@ static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *par + { + first_label_found = true; + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) ++ if (!vsir_instruction_init_with_params(program, ++ &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) + goto fail; + dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); + src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); + ins_count++; + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) ++ if (!vsir_instruction_init_with_params(program, ++ &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) + goto fail; + ins_count++; + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) ++ if (!vsir_instruction_init_with_params(program, ++ &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) + goto fail; + src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); + ins_count++; + } + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) ++ if (!vsir_instruction_init_with_params(program, ++ &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) + goto fail; + src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); + ins_count++; +@@ -2957,7 +2967,8 @@ static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *par + + if (vsir_register_is_label(&ins->src[0].reg)) + { +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) ++ if (!vsir_instruction_init_with_params(program, ++ &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) + goto fail; + dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); + src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); +@@ -2965,7 +2976,8 @@ static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *par + } + else + { +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) ++ if (!vsir_instruction_init_with_params(program, ++ &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) + goto fail; + dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); + instructions[ins_count].src[0] = ins->src[0]; +@@ -2974,7 +2986,8 @@ static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *par + ins_count++; + } + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) ++ if (!vsir_instruction_init_with_params(program, ++ &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) + goto fail; + ins_count++; + break; +@@ -2994,23 +3007,23 @@ static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *par + if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) + goto fail; + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) ++ if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) + goto fail; + ins_count++; + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) ++ if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) + goto fail; + ins_count++; + +- if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) ++ if (!vsir_instruction_init_with_params(program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) + goto fail; + ins_count++; + +- vkd3d_free(parser->program.instructions.elements); +- parser->program.instructions.elements = instructions; +- parser->program.instructions.capacity = ins_capacity; +- parser->program.instructions.count = ins_count; +- parser->program.temp_count += 1; ++ vkd3d_free(program->instructions.elements); ++ program->instructions.elements = instructions; ++ program->instructions.capacity = ins_capacity; ++ program->instructions.count = ins_count; ++ program->temp_count += 1; + + return VKD3D_OK; + +@@ -3906,25 +3919,26 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) + { +- struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions; ++ struct vkd3d_shader_message_context *message_context = parser->message_context; ++ struct vsir_program *program = &parser->program; + enum vkd3d_result result = VKD3D_OK; + +- remove_dcl_temps(&parser->program); ++ remove_dcl_temps(program); + +- if ((result = instruction_array_lower_texkills(parser)) < 0) ++ if ((result = vsir_program_lower_texkills(program)) < 0) + return result; + +- if (parser->program.shader_version.major >= 6) ++ if (program->shader_version.major >= 6) + { + struct vsir_cfg cfg; + +- if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) ++ if ((result = lower_switch_to_if_ladder(program)) < 0) + return result; + +- if ((result = materialize_ssas_to_temps(parser)) < 0) ++ if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) + return result; + +- if ((result = vsir_cfg_init(&cfg, &parser->program, parser->message_context)) < 0) ++ if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) + return result; + + vsir_cfg_compute_dominators(&cfg); +@@ -3947,7 +3961,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + return result; + } + +- if ((result = simple_structurizer_run(parser)) < 0) ++ if ((result = vsir_program_structurise(program)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; +@@ -3957,29 +3971,29 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + } + else + { +- if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + { +- if ((result = remap_output_signature(parser, compile_info)) < 0) ++ if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) + return result; + } + +- if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) + { +- if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0) ++ if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) + return result; + +- if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, +- &parser->program.input_signature)) < 0) ++ if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, ++ &program->input_signature)) < 0) + return result; + } + + if ((result = shader_normalise_io_registers(parser)) < 0) + return result; + +- if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) ++ if ((result = instruction_array_normalise_flat_constants(program)) < 0) + return result; + +- remove_dead_code(&parser->program); ++ remove_dead_code(program); + + if ((result = normalise_combined_samplers(parser)) < 0) + return result; +@@ -3989,7 +4003,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + return result; + + if (TRACE_ON()) +- vkd3d_shader_trace(&parser->program); ++ vkd3d_shader_trace(program); + + if (!parser->failed && (result = vsir_validate(parser)) < 0) + return result; +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 0568407f997..1518afa93be 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -7020,6 +7020,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + SpvOp op = SpvOpMax; + unsigned int i; + ++ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) ++ { ++ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ ++ FIXME("Unsupported 64-bit source for bit count.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "64-bit source for bit count is not supported."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ + if (src->reg.data_type == VKD3D_DATA_BOOL) + { + if (dst->reg.data_type == VKD3D_DATA_BOOL) +@@ -7160,6 +7169,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp + unsigned int i, component_count; + enum GLSLstd450 glsl_inst; + ++ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI ++ || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) ++ { ++ /* At least some drivers support this anyway, but if validation is enabled it will fail. */ ++ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "64-bit source for handler %#x is not supported.", instruction->handler_idx); ++ return; ++ } ++ + glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); + if (glsl_inst == GLSLstd450Bad) + { +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-abf76372e0a44dd3920e3fb796d75e920d0.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-abf76372e0a44dd3920e3fb796d75e920d0.patch new file mode 100644 index 00000000..5f1d712c --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-abf76372e0a44dd3920e3fb796d75e920d0.patch @@ -0,0 +1,1803 @@ +From cfe8e416936bc48f9a4a02a2f13226b81fc98423 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Fri, 15 Mar 2024 16:37:26 +1100 +Subject: [PATCH] Updated vkd3d to abf76372e0a44dd3920e3fb796d75e920d0c07bb. + +--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 8 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 229 +++++++- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 18 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 10 + + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 + + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 309 +++++++++-- + libs/vkd3d/libs/vkd3d-shader/ir.c | 494 ++++++++++++++++-- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 20 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 4 +- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 12 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 17 +- + 11 files changed, 989 insertions(+), 135 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 57dd0258aef..9f153a1da04 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1358,7 +1358,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi + sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); + + if (!sm1->p.failed) +- ret = vsir_validate(&sm1->p); ++ ret = vkd3d_shader_parser_validate(&sm1->p); + + if (sm1->p.failed && ret >= 0) + ret = VKD3D_ERROR_INVALID_SHADER; +@@ -2334,10 +2334,14 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + break; + + case HLSL_OP2_SLT: ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); + write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP3_CMP: ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); + write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + +@@ -2496,7 +2500,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + + if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) + { +- FIXME("Matrix writemasks need to be lowered.\n"); ++ hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); + return; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index de51588b513..86671c07d70 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -393,6 +393,7 @@ enum dx_intrinsic_opcode + DX_TEXTURE_GATHER_CMP = 74, + DX_ATOMIC_BINOP = 78, + DX_ATOMIC_CMP_XCHG = 79, ++ DX_BARRIER = 80, + DX_DERIV_COARSEX = 83, + DX_DERIV_COARSEY = 84, + DX_DERIV_FINEX = 85, +@@ -465,6 +466,14 @@ enum dxil_atomic_binop_code + ATOMIC_BINOP_INVALID, + }; + ++enum dxil_sync_flags ++{ ++ SYNC_THREAD_GROUP = 0x1, ++ SYNC_GLOBAL_UAV = 0x2, ++ SYNC_THREAD_GROUP_UAV = 0x4, ++ SYNC_GROUP_SHARED_MEMORY = 0x8, ++}; ++ + struct sm6_pointer_info + { + const struct sm6_type *type; +@@ -543,6 +552,7 @@ struct sm6_value + { + const struct sm6_type *type; + enum sm6_value_type value_type; ++ unsigned int structure_stride; + bool is_undefined; + union + { +@@ -755,6 +765,7 @@ struct sm6_parser + + unsigned int indexable_temp_count; + unsigned int icb_count; ++ unsigned int tgsm_count; + + struct sm6_value *values; + size_t value_count; +@@ -2267,6 +2278,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st + register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); + } + ++static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) ++{ ++ vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); ++ reg->u.immconst_u32[0] = value; ++} ++ + static void dst_param_init(struct vkd3d_shader_dst_param *param) + { + param->write_mask = VKD3DSP_WRITEMASK_0; +@@ -2330,6 +2347,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, + param->reg = *reg; + } + ++static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) ++{ ++ src_param_init(param); ++ register_make_constant_uint(¶m->reg, value); ++} ++ + static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, + struct sm6_parser *sm6) + { +@@ -3009,6 +3032,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru + register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); + } + ++static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, ++ unsigned int alignment, unsigned int init, struct sm6_value *dst) ++{ ++ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); ++ struct vkd3d_shader_instruction *ins; ++ unsigned int byte_count; ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); ++ dst_param_init(&ins->declaration.tgsm_raw.reg); ++ register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); ++ dst->u.reg = ins->declaration.tgsm_raw.reg.reg; ++ dst->structure_stride = 0; ++ ins->declaration.tgsm_raw.alignment = alignment; ++ byte_count = elem_type->u.width / 8u; ++ if (byte_count != 4) ++ { ++ FIXME("Unsupported byte count %u.\n", byte_count); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Raw TGSM byte count %u is not supported.", byte_count); ++ } ++ ins->declaration.tgsm_raw.byte_count = byte_count; ++ /* The initialiser value index will be resolved later when forward references can be handled. */ ++ ins->flags = init; ++} ++ ++static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, ++ unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) ++{ ++ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); ++ struct vkd3d_shader_instruction *ins; ++ unsigned int structure_stride; ++ ++ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); ++ dst_param_init(&ins->declaration.tgsm_structured.reg); ++ register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, ++ data_type, sm6->tgsm_count++); ++ dst->u.reg = ins->declaration.tgsm_structured.reg.reg; ++ structure_stride = elem_type->u.width / 8u; ++ if (structure_stride != 4) ++ { ++ FIXME("Unsupported structure stride %u.\n", structure_stride); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Structured TGSM byte stride %u is not supported.", structure_stride); ++ } ++ dst->structure_stride = structure_stride; ++ ins->declaration.tgsm_structured.alignment = alignment; ++ ins->declaration.tgsm_structured.byte_stride = structure_stride; ++ ins->declaration.tgsm_structured.structure_count = count; ++ /* The initialiser value index will be resolved later when forward references can be handled. */ ++ ins->flags = init; ++} ++ + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) + { + const struct sm6_type *type, *scalar_type; +@@ -3134,10 +3209,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ + } + else if (address_space == ADDRESS_SPACE_GROUPSHARED) + { +- FIXME("Unsupported TGSM.\n"); +- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, +- "TGSM global variables are not supported."); +- return false; ++ if (!sm6_type_is_numeric(scalar_type)) ++ { ++ WARN("Unsupported type class %u.\n", scalar_type->class); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "TGSM variables of type class %u are not supported.", scalar_type->class); ++ return false; ++ } ++ if (count == 1) ++ sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); ++ else ++ sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); + } + else + { +@@ -3173,6 +3255,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init + return NULL; + } + ++static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) ++{ ++ const struct sm6_value *value; ++ ++ if (!index) ++ return false; ++ ++ --index; ++ if (!(value = sm6_parser_get_value_safe(sm6, index)) ++ || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) ++ { ++ WARN("Invalid initialiser index %zu.\n", index); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "TGSM initialiser value index %zu is invalid.", index); ++ return false; ++ } ++ else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) ++ { ++ return true; ++ } ++ else if (sm6_value_is_undef(value)) ++ { ++ /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ ++ return false; ++ } ++ ++ FIXME("Non-zero initialisers are not supported.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, ++ "Non-zero TGSM initialisers are not supported."); ++ return false; ++} ++ + static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + { + size_t i, count, base_value_idx = sm6->value_count; +@@ -3246,6 +3360,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + { + ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); + } ++ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) ++ { ++ ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ++ ins->flags = 0; ++ } ++ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) ++ { ++ ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ++ ins->flags = 0; ++ } + } + for (i = base_value_idx; i < sm6->value_count; ++i) + { +@@ -3989,6 +4113,27 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr + dst->u.reg = dst_params[0].reg; + } + ++static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ enum dxil_sync_flags flags; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); ++ flags = sm6_value_get_constant_uint(operands[0]); ++ ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); ++ if (flags & SYNC_GLOBAL_UAV) ++ ins->flags |= VKD3DSSF_GLOBAL_UAV; ++ if (flags & SYNC_GROUP_SHARED_MEMORY) ++ ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; ++ if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) ++ { ++ FIXME("Unhandled flags %#x.\n", flags); ++ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, ++ "Barrier flags %#x are unhandled.", flags); ++ } ++} ++ + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -4818,6 +4963,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, + [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, ++ [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, + [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, + [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, + [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, +@@ -5541,6 +5687,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record + register_index_address_init(®->idx[1], elem_value, sm6); + reg->idx[1].is_in_bounds = is_in_bounds; + reg->idx_count = 2; ++ dst->structure_stride = src->structure_stride; + + ins->handler_idx = VKD3DSIH_NOP; + } +@@ -5549,8 +5696,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) + { + const struct sm6_type *elem_type = NULL, *pointee_type; +- struct vkd3d_shader_src_param *src_param; +- unsigned int alignment, i = 0; ++ unsigned int alignment, operand_count, i = 0; ++ struct vkd3d_shader_src_param *src_params; + const struct sm6_value *ptr; + uint64_t alignment_code; + +@@ -5587,12 +5734,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor + if (record->operands[i]) + WARN("Ignoring volatile modifier.\n"); + +- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (ptr->structure_stride) ++ { ++ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); + +- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) +- return; +- src_param_init_from_value(&src_param[0], ptr); +- src_param->reg.alignment = alignment; ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ if (ptr->u.reg.idx[1].rel_addr) ++ src_params[0] = *ptr->u.reg.idx[1].rel_addr; ++ else ++ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); ++ /* Struct offset is always zero as there is no struct, just an array. */ ++ src_param_make_constant_uint(&src_params[1], 0); ++ src_param_init_from_value(&src_params[2], ptr); ++ src_params[2].reg.alignment = alignment; ++ } ++ else ++ { ++ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); ++ ++ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) ++ return; ++ if (operand_count > 1) ++ src_param_make_constant_uint(&src_params[0], 0); ++ src_param_init_from_value(&src_params[operand_count - 1], ptr); ++ src_params[operand_count - 1].reg.alignment = alignment; ++ } + + instruction_dst_param_init_ssa_scalar(ins, sm6); + } +@@ -5710,11 +5879,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record + static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, + struct vkd3d_shader_instruction *ins, struct sm6_value *dst) + { +- struct vkd3d_shader_src_param *src_param; ++ unsigned int i = 0, alignment, operand_count; ++ struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_dst_param *dst_param; + const struct sm6_type *pointee_type; + const struct sm6_value *ptr, *src; +- unsigned int i = 0, alignment; + uint64_t alignment_code; + + if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) +@@ -5747,16 +5916,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco + if (record->operands[i]) + WARN("Ignoring volatile modifier.\n"); + +- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ if (ptr->structure_stride) ++ { ++ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); + +- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) +- return; +- src_param_init_from_value(&src_param[0], src); ++ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) ++ return; ++ if (ptr->u.reg.idx[1].rel_addr) ++ src_params[0] = *ptr->u.reg.idx[1].rel_addr; ++ else ++ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); ++ /* Struct offset is always zero as there is no struct, just an array. */ ++ src_param_make_constant_uint(&src_params[1], 0); ++ src_param_init_from_value(&src_params[2], src); ++ } ++ else ++ { ++ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); ++ ++ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) ++ return; ++ if (operand_count > 1) ++ src_param_make_constant_uint(&src_params[0], 0); ++ src_param_init_from_value(&src_params[operand_count - 1], src); ++ } + + dst_param = instruction_dst_params_alloc(ins, 1, sm6); + dst_param_init(dst_param); + dst_param->reg = ptr->u.reg; + dst_param->reg.alignment = alignment; ++ /* Groupshared stores contain the address in the src params. */ ++ if (dst_param->reg.type != VKD3DSPR_IDXTEMP) ++ dst_param->reg.idx_count = 1; + } + + static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -8481,7 +8674,7 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi + vkd3d_free(byte_code); + + if (!sm6->p.failed && ret >= 0) +- ret = vsir_validate(&sm6->p); ++ ret = vkd3d_shader_parser_validate(&sm6->p); + + if (sm6->p.failed && ret >= 0) + ret = VKD3D_ERROR_INVALID_SHADER; +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index da90782c814..23ea89c47be 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -39,6 +39,13 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( + generator->failed = true; + } + ++static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); ++} ++ + static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + const struct vkd3d_shader_instruction *ins) + { +@@ -69,9 +76,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator + shader_glsl_ret(generator, instruction); + break; + default: +- vkd3d_glsl_compiler_error(generator, +- VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Unhandled instruction %#x", instruction->handler_idx); ++ shader_glsl_unhandled(generator, instruction); + break; + } + } +@@ -92,11 +97,14 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, + vkd3d_glsl_handle_instruction(generator, &instructions->elements[i]); + } + ++ vkd3d_string_buffer_printf(&generator->buffer, "}\n"); ++ ++ if (TRACE_ON()) ++ vkd3d_string_buffer_trace(&generator->buffer); ++ + if (generator->failed) + return VKD3D_ERROR_INVALID_SHADER; + +- vkd3d_string_buffer_printf(&generator->buffer, "}\n"); +- + if ((code = vkd3d_malloc(generator->buffer.buffer_size))) + { + memcpy(code, generator->buffer.buffer, generator->buffer.content_size); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 2820b9abf67..a82334e58fd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -1348,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); + } + ++struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, ++ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; ++ ++ assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); ++ assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); ++ return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); ++} ++ + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, + struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 681f2edce31..5ced5edc766 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -594,6 +594,7 @@ enum hlsl_ir_expr_op + HLSL_OP2_MUL, + HLSL_OP2_NEQUAL, + HLSL_OP2_RSHIFT, ++ /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ + HLSL_OP2_SLT, + + /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, +@@ -1247,6 +1248,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond + struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, ++ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); + + void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 9eb65dc0170..ff349ab49ef 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -2902,7 +2902,7 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + return true; + } + +-/* Use movc/cmp/slt for the ternary operator. */ ++/* Use movc/cmp for the ternary operator. */ + static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; +@@ -2928,7 +2928,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return false; + } + +- if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (ctx->profile->major_version < 4) + { + struct hlsl_ir_node *abs, *neg; + +@@ -2946,51 +2946,6 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) + return false; + } +- else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) +- { +- struct hlsl_ir_node *neg, *slt, *sum, *cond2, *slt_cast, *mul; +- +- /* Expression used here is "slt() * (first - second) + second". */ +- +- if (ctx->profile->major_version == 3) +- { +- if (!(cond2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc))) +- return false; +- } +- else +- { +- if (!(cond2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, cond, cond))) +- return false; +- } +- hlsl_block_add_instr(block, cond2); +- +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cond2, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, neg); +- +- if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg, cond2))) +- return false; +- hlsl_block_add_instr(block, slt); +- +- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, second, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, neg); +- +- if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, first, neg))) +- return false; +- hlsl_block_add_instr(block, sum); +- +- if (!(slt_cast = hlsl_new_cast(ctx, slt, sum->data_type, &instr->loc))) +- return false; +- hlsl_block_add_instr(block, slt_cast); +- +- if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, slt_cast, sum))) +- return false; +- hlsl_block_add_instr(block, mul); +- +- if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul, second))) +- return false; +- } + else + { + if (cond->data_type->base_type == HLSL_TYPE_FLOAT) +@@ -3020,6 +2975,261 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return true; + } + ++static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, ++ struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ bool negate = false; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS ++ && expr->op != HLSL_OP2_GEQUAL) ++ return false; ++ ++ arg1 = expr->operands[0].node; ++ arg2 = expr->operands[1].node; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ ++ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg1_cast); ++ ++ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg2_cast); ++ ++ switch (expr->op) ++ { ++ case HLSL_OP2_EQUAL: ++ case HLSL_OP2_NEQUAL: ++ { ++ struct hlsl_ir_node *neg, *sub, *abs, *abs_neg; ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg); ++ ++ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) ++ return false; ++ hlsl_block_add_instr(block, sub); ++ ++ if (ctx->profile->major_version >= 3) ++ { ++ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, abs); ++ } ++ else ++ { ++ /* Use MUL as a precarious ABS. */ ++ if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) ++ return false; ++ hlsl_block_add_instr(block, abs); ++ } ++ ++ if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, abs_neg); ++ ++ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs))) ++ return false; ++ hlsl_block_add_instr(block, slt); ++ ++ negate = (expr->op == HLSL_OP2_EQUAL); ++ break; ++ } ++ ++ case HLSL_OP2_GEQUAL: ++ case HLSL_OP2_LESS: ++ { ++ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast))) ++ return false; ++ hlsl_block_add_instr(block, slt); ++ ++ negate = (expr->op == HLSL_OP2_GEQUAL); ++ break; ++ } ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (negate) ++ { ++ struct hlsl_constant_value one_value; ++ struct hlsl_ir_node *one, *slt_neg; ++ ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); ++ ++ if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, slt_neg); ++ ++ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg))) ++ return false; ++ hlsl_block_add_instr(block, res); ++ } ++ else ++ { ++ res = slt; ++ } ++ ++ /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, ++ * and casts to BOOL have already been lowered to "!= 0". */ ++ memset(operands, 0, sizeof(operands)); ++ operands[0] = res; ++ if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, ret); ++ ++ return true; ++} ++ ++/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to ++ * CMP instructions (only available in pixel shaders). ++ * Based on the following equivalence: ++ * SLT(x, y) ++ * = (x < y) ? 1.0 : 0.0 ++ * = ((x - y) >= 0) ? 0.0 : 1.0 ++ * = CMP(x - y, 0.0, 1.0) ++ */ ++static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; ++ struct hlsl_constant_value zero_value, one_value; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP2_SLT) ++ return false; ++ ++ arg1 = expr->operands[0].node; ++ arg2 = expr->operands[1].node; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ ++ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg1_cast); ++ ++ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, arg2_cast); ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg); ++ ++ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) ++ return false; ++ hlsl_block_add_instr(block, sub); ++ ++ memset(&zero_value, 0, sizeof(zero_value)); ++ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); ++ ++ if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) ++ return false; ++ hlsl_block_add_instr(block, cmp); ++ ++ return true; ++} ++ ++/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to ++ * SLT instructions (only available in vertex shaders). ++ * Based on the following equivalence: ++ * CMP(x, y, z) ++ * = (x >= 0) ? y : z ++ * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) ++ * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) ++ */ ++static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; ++ struct hlsl_constant_value zero_value, one_value; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_expr *expr; ++ unsigned int i; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ if (expr->op != HLSL_OP3_CMP) ++ return false; ++ ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); ++ ++ for (i = 0; i < 3; ++i) ++ { ++ args[i] = expr->operands[i].node; ++ ++ if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, args_cast[i]); ++ } ++ ++ memset(&zero_value, 0, sizeof(zero_value)); ++ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ ++ one_value.u[0].f = 1.0; ++ one_value.u[1].f = 1.0; ++ one_value.u[2].f = 1.0; ++ one_value.u[3].f = 1.0; ++ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, one); ++ ++ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero))) ++ return false; ++ hlsl_block_add_instr(block, slt); ++ ++ if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt))) ++ return false; ++ hlsl_block_add_instr(block, mul1); ++ ++ if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, neg_slt); ++ ++ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt))) ++ return false; ++ hlsl_block_add_instr(block, sub); ++ ++ if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub))) ++ return false; ++ hlsl_block_add_instr(block, mul2); ++ ++ if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2))) ++ return false; ++ hlsl_block_add_instr(block, add); ++ ++ return true; ++} ++ + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_type *type = instr->data_type, *arg_type; +@@ -5209,6 +5419,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_round, body); + lower_ir(ctx, lower_ceil, body); + lower_ir(ctx, lower_floor, body); ++ lower_ir(ctx, lower_comparison_operators, body); ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ lower_ir(ctx, lower_slt, body); ++ else ++ lower_ir(ctx, lower_cmp, body); + } + + if (profile->major_version < 2) +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 0dd31af9192..55d1216460f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -1386,10 +1386,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + } + } + +-static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) + { +- struct io_normaliser normaliser = {parser->program.instructions}; +- struct vsir_program *program = &parser->program; ++ struct io_normaliser normaliser = {program->instructions}; + struct vkd3d_shader_instruction *ins; + bool has_control_point_phase; + unsigned int i, j; +@@ -1671,19 +1670,20 @@ static void remove_dead_code(struct vsir_program *program) + } + } + +-static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) + { + unsigned int i; + +- for (i = 0; i < parser->program.instructions.count; ++i) ++ for (i = 0; i < program->instructions.count; ++i) + { +- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_src_param *srcs; + + switch (ins->handler_idx) + { + case VKD3DSIH_TEX: +- if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3))) ++ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(srcs, 0, sizeof(*srcs) * 3); + +@@ -1726,7 +1726,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser + case VKD3DSIH_TEXREG2AR: + case VKD3DSIH_TEXREG2GB: + case VKD3DSIH_TEXREG2RGB: +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " + "Combined sampler instruction %#x.", ins->handler_idx); + return VKD3D_ERROR_NOT_IMPLEMENTED; +@@ -1792,10 +1792,10 @@ struct cf_flattener_info + + struct cf_flattener + { +- struct vkd3d_shader_parser *parser; ++ struct vsir_program *program; + + struct vkd3d_shader_location location; +- bool allocation_failed; ++ enum vkd3d_result status; + + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; +@@ -1815,13 +1815,20 @@ struct cf_flattener + size_t control_flow_info_size; + }; + ++static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error) ++{ ++ if (flattener->status != VKD3D_OK) ++ return; ++ flattener->status = error; ++} ++ + static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count) + { + if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity, + flattener->instruction_count + count, sizeof(*flattener->instructions))) + { + ERR("Failed to allocate instructions.\n"); +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + return NULL; + } + return &flattener->instructions[flattener->instruction_count]; +@@ -1853,9 +1860,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ + { + struct vkd3d_shader_src_param *params; + +- if (!(params = vsir_program_get_src_params(&flattener->parser->program, count))) ++ if (!(params = vsir_program_get_src_params(flattener->program, count))) + { +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + return NULL; + } + ins->src = params; +@@ -1869,10 +1876,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int + + if (!(ins = cf_flattener_require_space(flattener, 1))) + return; +- if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program)) ++ if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) + ++flattener->instruction_count; + else +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + } + + /* For conditional branches, this returns the false target branch parameter. */ +@@ -1950,7 +1957,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ + flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info))) + { + ERR("Failed to allocate control flow info structure.\n"); +- flattener->allocation_failed = true; ++ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); + return NULL; + } + +@@ -2017,12 +2024,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla + flattener->block_names[block_id] = buffer.buffer; + } + +-static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener) ++static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, ++ struct vkd3d_shader_message_context *message_context) + { + bool main_block_open, is_hull_shader, after_declarations_section; +- struct vkd3d_shader_parser *parser = flattener->parser; + struct vkd3d_shader_instruction_array *instructions; +- struct vsir_program *program = &parser->program; ++ struct vsir_program *program = flattener->program; + struct vkd3d_shader_instruction *dst_ins; + size_t i; + +@@ -2074,7 +2081,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + break; + + case VKD3DSIH_LABEL: +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ vkd3d_shader_error(message_context, &instruction->location, ++ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: Label instruction."); + return VKD3D_ERROR_NOT_IMPLEMENTED; + +@@ -2239,8 +2247,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) + { + WARN("Unexpected src swizzle %#x.\n", src->swizzle); +- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, ++ vkd3d_shader_error(message_context, &instruction->location, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, + "The swizzle for a switch case value is not scalar X."); ++ cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER); + } + value = *src->reg.u.immconst_u32; + +@@ -2368,21 +2378,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + ++flattener->instruction_count; + } + +- return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK; ++ return flattener->status; + } + +-static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser) ++static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) + { +- struct vsir_program *program = &parser->program; +- struct cf_flattener flattener = {0}; ++ struct cf_flattener flattener = {.program = program}; + enum vkd3d_result result; + +- flattener.parser = parser; +- result = cf_flattener_iterate_instruction_array(&flattener); +- +- if (result >= 0) ++ if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) + { +- vkd3d_free(parser->program.instructions.elements); ++ vkd3d_free(program->instructions.elements); + program->instructions.elements = flattener.instructions; + program->instructions.capacity = flattener.instruction_capacity; + program->instructions.count = flattener.instruction_count; +@@ -3142,6 +3149,93 @@ static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_blo + return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); + } + ++struct vsir_cfg_structure_list ++{ ++ struct vsir_cfg_structure *structures; ++ size_t count, capacity; ++ unsigned int end; ++}; ++ ++struct vsir_cfg_structure ++{ ++ enum vsir_cfg_structure_type ++ { ++ /* Execute a block of the original VSIR program. */ ++ STRUCTURE_TYPE_BLOCK, ++ /* Execute a loop, which is identified by an index. */ ++ STRUCTURE_TYPE_LOOP, ++ /* Execute a `return' or a (possibly) multilevel `break' or ++ * `continue', targeting a loop by its index. If `condition' ++ * is non-NULL, then the jump is conditional (this is ++ * currently not allowed for `return'). */ ++ STRUCTURE_TYPE_JUMP, ++ } type; ++ union ++ { ++ struct vsir_block *block; ++ struct ++ { ++ struct vsir_cfg_structure_list body; ++ unsigned idx; ++ } loop; ++ struct ++ { ++ enum vsir_cfg_jump_type ++ { ++ /* NONE is available as an intermediate value, but it ++ * is not allowed in valid structured programs. */ ++ JUMP_NONE, ++ JUMP_BREAK, ++ JUMP_CONTINUE, ++ JUMP_RET, ++ } type; ++ unsigned int target; ++ struct vkd3d_shader_src_param *condition; ++ bool invert_condition; ++ } jump; ++ } u; ++}; ++ ++static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type); ++static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure); ++ ++static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < list->count; ++i) ++ vsir_cfg_structure_cleanup(&list->structures[i]); ++ vkd3d_free(list->structures); ++} ++ ++static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list, ++ enum vsir_cfg_structure_type type) ++{ ++ struct vsir_cfg_structure *ret; ++ ++ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1, ++ sizeof(*list->structures))) ++ return NULL; ++ ++ ret = &list->structures[list->count++]; ++ ++ vsir_cfg_structure_init(ret, type); ++ ++ return ret; ++} ++ ++static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) ++{ ++ memset(structure, 0, sizeof(*structure)); ++ structure->type = type; ++} ++ ++static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) ++{ ++ if (structure->type == STRUCTURE_TYPE_LOOP) ++ vsir_cfg_structure_list_cleanup(&structure->u.loop.body); ++} ++ + struct vsir_cfg + { + struct vkd3d_shader_message_context *message_context; +@@ -3187,6 +3281,8 @@ struct vsir_cfg + bool synthetic; + } *loop_intervals; + size_t loop_interval_count, loop_interval_capacity; ++ ++ struct vsir_cfg_structure_list structured_program; + }; + + static void vsir_cfg_cleanup(struct vsir_cfg *cfg) +@@ -3201,6 +3297,8 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) + + vsir_block_list_cleanup(&cfg->order); + ++ vsir_cfg_structure_list_cleanup(&cfg->structured_program); ++ + vkd3d_free(cfg->blocks); + vkd3d_free(cfg->loops); + vkd3d_free(cfg->loops_by_header); +@@ -3288,6 +3386,76 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) + TRACE("}\n"); + } + ++static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list); ++ ++static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure) ++{ ++ switch (structure->type) ++ { ++ case STRUCTURE_TYPE_BLOCK: ++ TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label); ++ break; ++ ++ case STRUCTURE_TYPE_LOOP: ++ TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx); ++ ++ vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body); ++ ++ TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); ++ break; ++ ++ case STRUCTURE_TYPE_JUMP: ++ { ++ const char *type_str; ++ ++ switch (structure->u.jump.type) ++ { ++ case JUMP_RET: ++ TRACE("%sret\n", cfg->debug_buffer.buffer); ++ return; ++ ++ case JUMP_BREAK: ++ type_str = "break"; ++ break; ++ ++ case JUMP_CONTINUE: ++ type_str = "continue"; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ TRACE("%s%s%s %u\n", cfg->debug_buffer.buffer, type_str, ++ structure->u.jump.condition ? "c" : "", structure->u.jump.target); ++ break; ++ } ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) ++{ ++ unsigned int i; ++ ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, " "); ++ ++ for (i = 0; i < list->count; ++i) ++ vsir_cfg_structure_dump(cfg, &list->structures[i]); ++ ++ vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2); ++} ++ ++static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < cfg->structured_program.count; ++i) ++ vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]); ++} ++ + static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) + { +@@ -3916,6 +4084,217 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ + return VKD3D_OK; + } + ++struct vsir_cfg_edge_action ++{ ++ enum vsir_cfg_jump_type jump_type; ++ unsigned int target; ++ struct vsir_block *successor; ++}; ++ ++static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block, ++ struct vsir_block *successor, struct vsir_cfg_edge_action *action) ++{ ++ unsigned int i; ++ ++ action->target = UINT_MAX; ++ action->successor = successor; ++ ++ if (successor->order_pos <= block->order_pos) ++ { ++ /* The successor is before the current block, so we have to ++ * use `continue'. The target loop is the innermost that ++ * contains the current block and has the successor as ++ * `continue' target. */ ++ for (i = 0; i < cfg->loop_interval_count; ++i) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; ++ ++ if (interval->begin == successor->order_pos && block->order_pos < interval->end) ++ action->target = i; ++ ++ if (interval->begin > successor->order_pos) ++ break; ++ } ++ ++ assert(action->target != UINT_MAX); ++ action->jump_type = JUMP_CONTINUE; ++ } ++ else ++ { ++ /* The successor is after the current block, so we have to use ++ * `break', or possibly just jump to the following block. The ++ * target loop is the outermost that contains the current ++ * block and has the successor as `break' target. */ ++ for (i = 0; i < cfg->loop_interval_count; ++i) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; ++ ++ if (interval->begin <= block->order_pos && interval->end == successor->order_pos) ++ { ++ action->target = i; ++ break; ++ } ++ } ++ ++ if (action->target == UINT_MAX) ++ { ++ assert(successor->order_pos == block->order_pos + 1); ++ action->jump_type = JUMP_NONE; ++ } ++ else ++ { ++ action->jump_type = JUMP_BREAK; ++ } ++ } ++} ++ ++static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) ++{ ++ unsigned int i, stack_depth = 1, open_interval_idx = 0; ++ struct vsir_cfg_structure_list **stack = NULL; ++ ++ /* It's enough to allocate up to the maximum interval stacking ++ * depth (plus one for the full program), but this is simpler. */ ++ if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack)))) ++ goto fail; ++ cfg->structured_program.end = cfg->order.count; ++ stack[0] = &cfg->structured_program; ++ ++ for (i = 0; i < cfg->order.count; ++i) ++ { ++ struct vsir_block *block = cfg->order.blocks[i]; ++ struct vsir_cfg_structure *structure; ++ ++ assert(stack_depth > 0); ++ ++ /* Open loop intervals. */ ++ while (open_interval_idx < cfg->loop_interval_count) ++ { ++ struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx]; ++ ++ if (interval->begin != i) ++ break; ++ ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP))) ++ goto fail; ++ structure->u.loop.idx = open_interval_idx++; ++ ++ structure->u.loop.body.end = interval->end; ++ stack[stack_depth++] = &structure->u.loop.body; ++ } ++ ++ /* Execute the block. */ ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK))) ++ goto fail; ++ structure->u.block = block; ++ ++ /* Generate between zero and two jump instructions. */ ++ switch (block->end->handler_idx) ++ { ++ case VKD3DSIH_BRANCH: ++ { ++ struct vsir_cfg_edge_action action_true, action_false; ++ bool invert_condition = false; ++ ++ if (vsir_register_is_label(&block->end->src[0].reg)) ++ { ++ unsigned int target = label_from_src_param(&block->end->src[0]); ++ struct vsir_block *successor = &cfg->blocks[target - 1]; ++ ++ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); ++ action_false = action_true; ++ } ++ else ++ { ++ unsigned int target = label_from_src_param(&block->end->src[1]); ++ struct vsir_block *successor = &cfg->blocks[target - 1]; ++ ++ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); ++ ++ target = label_from_src_param(&block->end->src[2]); ++ successor = &cfg->blocks[target - 1]; ++ ++ vsir_cfg_compute_edge_action(cfg, block, successor, &action_false); ++ } ++ ++ /* This will happen if the branch is unconditional, ++ * but also if it's conditional with the same target ++ * in both branches, which can happen in some corner ++ * cases, e.g. when converting switch instructions to ++ * selection ladders. */ ++ if (action_true.successor == action_false.successor) ++ { ++ assert(action_true.jump_type == action_false.jump_type); ++ } ++ else ++ { ++ /* At most one branch can just fall through to the ++ * next block, in which case we make sure it's the ++ * false branch. */ ++ if (action_true.jump_type == JUMP_NONE) ++ { ++ struct vsir_cfg_edge_action tmp = action_true; ++ action_true = action_false; ++ action_false = tmp; ++ invert_condition = true; ++ } ++ ++ assert(action_true.jump_type != JUMP_NONE); ++ ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) ++ goto fail; ++ structure->u.jump.type = action_true.jump_type; ++ structure->u.jump.target = action_true.target; ++ structure->u.jump.condition = &block->end->src[0]; ++ structure->u.jump.invert_condition = invert_condition; ++ } ++ ++ if (action_false.jump_type != JUMP_NONE) ++ { ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) ++ goto fail; ++ structure->u.jump.type = action_false.jump_type; ++ structure->u.jump.target = action_false.target; ++ } ++ break; ++ } ++ ++ case VKD3DSIH_RET: ++ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) ++ goto fail; ++ structure->u.jump.type = JUMP_RET; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ /* Close loop intervals. */ ++ while (stack_depth > 0) ++ { ++ if (stack[stack_depth - 1]->end != i + 1) ++ break; ++ ++ --stack_depth; ++ } ++ } ++ ++ assert(stack_depth == 0); ++ assert(open_interval_idx == cfg->loop_interval_count); ++ ++ if (TRACE_ON()) ++ vsir_cfg_dump_structured_program(cfg); ++ ++ vkd3d_free(stack); ++ ++ return VKD3D_OK; ++ ++fail: ++ vkd3d_free(stack); ++ ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++} ++ + enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info) + { +@@ -3961,6 +4340,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + return result; + } + ++ if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) ++ { ++ vsir_cfg_cleanup(&cfg); ++ return result; ++ } ++ + if ((result = vsir_program_structurise(program)) < 0) + { + vsir_cfg_cleanup(&cfg); +@@ -3987,7 +4372,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + return result; + } + +- if ((result = shader_normalise_io_registers(parser)) < 0) ++ if ((result = vsir_program_normalise_io_registers(program)) < 0) + return result; + + if ((result = instruction_array_normalise_flat_constants(program)) < 0) +@@ -3995,31 +4380,31 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, + + remove_dead_code(program); + +- if ((result = normalise_combined_samplers(parser)) < 0) ++ if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) + return result; + } + +- if ((result = flatten_control_flow_constructs(parser)) < 0) ++ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) + return result; + + if (TRACE_ON()) + vkd3d_shader_trace(program); + +- if (!parser->failed && (result = vsir_validate(parser)) < 0) ++ if ((result = vsir_program_validate(program, parser->config_flags, ++ compile_info->source_name, message_context)) < 0) + return result; + +- if (parser->failed) +- result = VKD3D_ERROR_INVALID_SHADER; +- + return result; + } + + struct validation_context + { +- struct vkd3d_shader_parser *parser; ++ struct vkd3d_shader_message_context *message_context; + const struct vsir_program *program; + size_t instruction_idx; ++ struct vkd3d_shader_location null_location; + bool invalid_instruction_idx; ++ enum vkd3d_result status; + bool dcl_temps_found; + enum vkd3d_shader_opcode phase; + enum cf_type +@@ -4065,16 +4450,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c + + if (ctx->invalid_instruction_idx) + { +- vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer); ++ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); + ERR("VSIR validation error: %s\n", buf.buffer); + } + else + { +- vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); ++ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; ++ vkd3d_shader_error(ctx->message_context, &ins->location, error, ++ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); + ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); + } + + vkd3d_string_buffer_cleanup(&buf); ++ ++ if (!ctx->status) ++ ctx->status = VKD3D_ERROR_INVALID_SHADER; + } + + static void vsir_validate_src_param(struct validation_context *ctx, +@@ -4128,10 +4518,10 @@ static void vsir_validate_register(struct validation_context *ctx, + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); + +- if (reg->idx[0].offset >= ctx->parser->program.temp_count) ++ if (reg->idx[0].offset >= ctx->program->temp_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", +- reg->idx[0].offset, ctx->parser->program.temp_count); ++ reg->idx[0].offset, ctx->program->temp_count); + break; + } + +@@ -4321,7 +4711,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, + switch (dst->reg.type) + { + case VKD3DSPR_SSA: +- if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count) ++ if (dst->reg.idx[0].offset < ctx->program->ssa_count) + { + struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; + +@@ -4374,7 +4764,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, + switch (src->reg.type) + { + case VKD3DSPR_SSA: +- if (src->reg.idx[0].offset < ctx->parser->program.ssa_count) ++ if (src->reg.idx[0].offset < ctx->program->ssa_count) + { + struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; + unsigned int i; +@@ -4465,7 +4855,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) + size_t i; + + instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; +- ctx->parser->location = instruction->location; + + for (i = 0; i < instruction->dst_count; ++i) + vsir_validate_dst_param(ctx, &instruction->dst[i]); +@@ -4816,17 +5205,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) + } + } + +-enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) ++enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, ++ const char *source_name, struct vkd3d_shader_message_context *message_context) + { + struct validation_context ctx = + { +- .parser = parser, +- .program = &parser->program, ++ .message_context = message_context, ++ .program = program, ++ .null_location = {.source_name = source_name}, ++ .status = VKD3D_OK, + .phase = VKD3DSIH_INVALID, + }; + unsigned int i; + +- if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) ++ if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) + return VKD3D_OK; + + if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) +@@ -4835,7 +5227,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) + if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) + goto fail; + +- for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx) ++ for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) + vsir_validate_instruction(&ctx); + + ctx.invalid_instruction_idx = true; +@@ -4860,7 +5252,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) + vkd3d_free(ctx.temps); + vkd3d_free(ctx.ssas); + +- return VKD3D_OK; ++ return ctx.status; + + fail: + vkd3d_free(ctx.blocks); +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 1518afa93be..b4f34c42124 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -6435,20 +6435,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + } + + static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) ++ const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, ++ unsigned int structure_stride, bool zero_init) + { +- uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; ++ uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const SpvStorageClass storage_class = SpvStorageClassWorkgroup; + struct vkd3d_symbol reg_symbol; + ++ /* Alignment is supported only in the Kernel execution model. */ ++ if (alignment) ++ TRACE("Ignoring alignment %u.\n", alignment); ++ + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + length_id = spirv_compiler_get_constant_uint(compiler, size); + array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); + + pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); ++ init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, +- pointer_type_id, storage_class, 0); ++ pointer_type_id, storage_class, init_id); + + spirv_compiler_emit_register_debug_name(builder, var_id, reg); + +@@ -6463,8 +6469,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { + const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; +- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, +- tgsm_raw->byte_count / 4, 0); ++ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, ++ tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); + } + + static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, +@@ -6472,8 +6478,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi + { + const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; + unsigned int stride = tgsm_structured->byte_stride / 4; +- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, +- tgsm_structured->structure_count * stride, stride); ++ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, ++ tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); + } + + static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index bd558693b07..cb4f6d4ddbf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u + ins->declaration.tgsm_raw.byte_count = *tokens; + if (ins->declaration.tgsm_raw.byte_count % 4) + FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); ++ ins->declaration.tgsm_raw.zero_init = false; + } + + static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction + ins->declaration.tgsm_structured.structure_count = *tokens; + if (ins->declaration.tgsm_structured.byte_stride % 4) + FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); ++ ins->declaration.tgsm_structured.zero_init = false; + } + + static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -2724,7 +2726,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + shader_sm4_validate_default_phase_index_ranges(sm4); + + if (!sm4->p.failed) +- vsir_validate(&sm4->p); ++ vkd3d_shader_parser_validate(&sm4->p); + + if (sm4->p.failed) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 1ac372f163c..81ac84896d4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -73,8 +73,16 @@ void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer) + + void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer) + { +- buffer->buffer[0] = '\0'; +- buffer->content_size = 0; ++ vkd3d_string_buffer_truncate(buffer, 0); ++} ++ ++void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size) ++{ ++ if (size < buffer->content_size) ++ { ++ buffer->buffer[size] = '\0'; ++ buffer->content_size = size; ++ } + } + + static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 98c311b3655..7503d564af0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -814,6 +814,8 @@ enum vkd3d_shader_type + VKD3D_SHADER_TYPE_COUNT, + }; + ++struct vkd3d_shader_message_context; ++ + struct vkd3d_shader_version + { + enum vkd3d_shader_type type; +@@ -1088,14 +1090,18 @@ struct vkd3d_shader_tgsm + struct vkd3d_shader_tgsm_raw + { + struct vkd3d_shader_dst_param reg; ++ unsigned int alignment; + unsigned int byte_count; ++ bool zero_init; + }; + + struct vkd3d_shader_tgsm_structured + { + struct vkd3d_shader_dst_param reg; ++ unsigned int alignment; + unsigned int byte_stride; + unsigned int structure_count; ++ bool zero_init; + }; + + struct vkd3d_shader_thread_group_size +@@ -1315,6 +1321,8 @@ struct vsir_program + + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); + void vsir_program_cleanup(struct vsir_program *program); ++enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, ++ const char *source_name, struct vkd3d_shader_message_context *message_context); + + static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( + struct vsir_program *program, unsigned int count) +@@ -1360,6 +1368,12 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse + parser->ops->parser_destroy(parser); + } + ++static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser) ++{ ++ return vsir_program_validate(&parser->program, parser->config_flags, ++ parser->location.source_name, parser->message_context); ++} ++ + struct vkd3d_shader_descriptor_info1 + { + enum vkd3d_shader_descriptor_type type; +@@ -1413,6 +1427,7 @@ void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); + void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); + void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); + void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); ++void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); + int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); + int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); + int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); +@@ -1522,8 +1537,6 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, + int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +-enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser); +- + static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( + enum vkd3d_data_type data_type) + { +-- +2.43.0 +